{ "cells": [ { "cell_type": "markdown", "id": "00cac1f9", "metadata": {}, "source": [ "# ACRO Tests" ] }, { "cell_type": "code", "execution_count": 1, "id": "e33fd4fb", "metadata": { "tags": [] }, "outputs": [], "source": [ "import os\n", "\n", "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "c01cfe12", "metadata": { "tags": [] }, "outputs": [], "source": [ "# uncomment this line if acro is not installed\n", "# ie you are in development mode\n", "# sys.path.insert(0, os.path.abspath(\"..\"))" ] }, { "cell_type": "code", "execution_count": 3, "id": "cc8d993a", "metadata": { "scrolled": true, "tags": [] }, "outputs": [], "source": [ "from acro import ACRO, add_constant, utils" ] }, { "cell_type": "markdown", "id": "530efcfe", "metadata": {}, "source": [ "### Instantiate ACRO" ] }, { "cell_type": "code", "execution_count": 4, "id": "4b8a77e2", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:version: 0.4.8\n", "INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False, 'survival_safe_threshold': 10, 'zeros_are_disclosive': True}\n", "INFO:acro:automatic suppression: False\n" ] } ], "source": [ "acro = ACRO(suppress=False)" ] }, { "cell_type": "markdown", "id": "27a2baaa", "metadata": {}, "source": [ "### Load test data" ] }, { "cell_type": "code", "execution_count": 5, "id": "8722735f", "metadata": { "scrolled": true, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
charitygrant_typeindexyearinc_activityinc_grantsinc_donationsinc_otherinc_totaltotal_costs...sh_staff_grants_givensh_assets_grants_givensh_income_balancesh_staff_balancesh_assets_balancesh_income_assetssh_staff_assetssh_income_staff_costssh_assets_staff_costswgt
04ChildrenR1.020112880902.09603182.091404.0310947.012886435.012127472.0...NaNNaN0.0726360.1359710.7678090.0946020.1770900.5342035.6468431.0
14ChildrenR1.020146810520.018768904.058002.0401879.026039304.025493796.0...NaNNaN0.0576410.0891501.0013960.0575600.0890260.64656111.2327291.0
24ChildrenR1.020157199403.021638036.0132191.0512654.029482284.032290108.0...NaNNaN-0.049619-0.079828-0.6202100.0800040.1287110.6215837.7693651.0
34ChildrenR1.020135573013.015194731.0228844.0267156.021263744.020989048.0...NaNNaN0.0457400.0682511.0082590.0453650.0676920.67016614.7727491.0
44ChildrenR1.020102056816.07335103.0110256.0424628.09926803.09769816.0...NaNNaN0.0576960.1225320.5675390.1016600.2159010.4708624.6317491.0
\n", "

5 rows × 44 columns

\n", "
" ], "text/plain": [ " charity grant_type index year inc_activity inc_grants inc_donations \\\n", "0 4Children R 1.0 2011 2880902.0 9603182.0 91404.0 \n", "1 4Children R 1.0 2014 6810520.0 18768904.0 58002.0 \n", "2 4Children R 1.0 2015 7199403.0 21638036.0 132191.0 \n", "3 4Children R 1.0 2013 5573013.0 15194731.0 228844.0 \n", "4 4Children R 1.0 2010 2056816.0 7335103.0 110256.0 \n", "\n", " inc_other inc_total total_costs ... sh_staff_grants_given \\\n", "0 310947.0 12886435.0 12127472.0 ... NaN \n", "1 401879.0 26039304.0 25493796.0 ... NaN \n", "2 512654.0 29482284.0 32290108.0 ... NaN \n", "3 267156.0 21263744.0 20989048.0 ... NaN \n", "4 424628.0 9926803.0 9769816.0 ... NaN \n", "\n", " sh_assets_grants_given sh_income_balance sh_staff_balance \\\n", "0 NaN 0.072636 0.135971 \n", "1 NaN 0.057641 0.089150 \n", "2 NaN -0.049619 -0.079828 \n", "3 NaN 0.045740 0.068251 \n", "4 NaN 0.057696 0.122532 \n", "\n", " sh_assets_balance sh_income_assets sh_staff_assets sh_income_staff_costs \\\n", "0 0.767809 0.094602 0.177090 0.534203 \n", "1 1.001396 0.057560 0.089026 0.646561 \n", "2 -0.620210 0.080004 0.128711 0.621583 \n", "3 1.008259 0.045365 0.067692 0.670166 \n", "4 0.567539 0.101660 0.215901 0.470862 \n", "\n", " sh_assets_staff_costs wgt \n", "0 5.646843 1.0 \n", "1 11.232729 1.0 \n", "2 7.769365 1.0 \n", "3 14.772749 1.0 \n", "4 4.631749 1.0 \n", "\n", "[5 rows x 44 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = os.path.join(\"../data\", \"test_data.dta\")\n", "df = pd.read_stata(path)\n", "df.head()" ] }, { "cell_type": "markdown", "id": "4ae844a0", "metadata": {}, "source": [ "### Pandas crosstab" ] }, { "cell_type": "code", "execution_count": 6, "id": "961684cb", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivorDead in 2015Alive in 2015
grant_typeGRGNRR/G
year
20103471259248
20113471259248
20123471259248
20133471259248
20143471259248
20153471259248
\n", "
" ], "text/plain": [ "survivor Dead in 2015 Alive in 2015 \n", "grant_type G R G N R R/G\n", "year \n", "2010 3 47 12 59 24 8\n", "2011 3 47 12 59 24 8\n", "2012 3 47 12 59 24 8\n", "2013 3 47 12 59 24 8\n", "2014 3 47 12 59 24 8\n", "2015 3 47 12 59 24 8" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table = pd.crosstab(df.year, [df.survivor, df.grant_type])\n", "table" ] }, { "cell_type": "markdown", "id": "d642ed00", "metadata": {}, "source": [ "### ACRO crosstab" ] }, { "cell_type": "code", "execution_count": 7, "id": "bb4b2677", "metadata": { "scrolled": true, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "----------------------------------------------------------------|\n", "survivor |Dead_in_2015 |Alive_in_2015 |\n", "grant_type |G R |G N R R/G |\n", "year | | |\n", "----------------------------------------------------------------|\n", "2010 | threshold; ok | ok ok ok threshold; |\n", "2011 | threshold; ok | ok ok ok threshold; |\n", "2012 | threshold; ok | ok ok ok threshold; |\n", "2013 | threshold; ok | ok ok ok threshold; |\n", "2014 | threshold; ok | ok ok ok threshold; |\n", "2015 | threshold; ok | ok ok ok threshold; |\n", "----------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_0\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivorDead in 2015Alive in 2015
grant_typeGRGNRR/G
year
20103471259248
20113471259248
20123471259248
20133471259248
20143471259248
20153471259248
\n", "
" ], "text/plain": [ "survivor Dead in 2015 Alive in 2015 \n", "grant_type G R G N R R/G\n", "year \n", "2010 3 47 12 59 24 8\n", "2011 3 47 12 59 24 8\n", "2012 3 47 12 59 24 8\n", "2013 3 47 12 59 24 8\n", "2014 3 47 12 59 24 8\n", "2015 3 47 12 59 24 8" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "safe_table = acro.crosstab(df.year, [df.survivor, df.grant_type])\n", "safe_table" ] }, { "cell_type": "markdown", "id": "59b223fb-7b88-4f51-9bdf-7dbb797849d1", "metadata": { "tags": [] }, "source": [ "### same table with column hierarchy reversed to make sure spaces in variable name. dealt with properly" ] }, { "cell_type": "code", "execution_count": 8, "id": "d01f7437-ceee-41b3-84ad-07976e0d58c3", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "------------------------------------------------------------------------------------------------|\n", "grant_type |G |N |R |R/G |\n", "survivor |Dead_in_2015 Alive_in_2015 |Alive_in_2015 |Dead_in_2015 Alive_in_2015 |Alive_in_2015|\n", "year | | | | |\n", "------------------------------------------------------------------------------------------------|\n", "2010 | threshold; ok | ok | ok ok | threshold; |\n", "2011 | threshold; ok | ok | ok ok | threshold; |\n", "2012 | threshold; ok | ok | ok ok | threshold; |\n", "2013 | threshold; ok | ok | ok ok | threshold; |\n", "2014 | threshold; ok | ok | ok ok | threshold; |\n", "2015 | threshold; ok | ok | ok ok | threshold; |\n", "------------------------------------------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_1\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
grant_typeGNRR/G
survivorDead in 2015Alive in 2015Alive in 2015Dead in 2015Alive in 2015Alive in 2015
year
20103125947248
20113125947248
20123125947248
20133125947248
20143125947248
20153125947248
\n", "
" ], "text/plain": [ "grant_type G N R \\\n", "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", "year \n", "2010 3 12 59 47 \n", "2011 3 12 59 47 \n", "2012 3 12 59 47 \n", "2013 3 12 59 47 \n", "2014 3 12 59 47 \n", "2015 3 12 59 47 \n", "\n", "grant_type R/G \n", "survivor Alive in 2015 Alive in 2015 \n", "year \n", "2010 24 8 \n", "2011 24 8 \n", "2012 24 8 \n", "2013 24 8 \n", "2014 24 8 \n", "2015 24 8 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "safe_table = acro.crosstab(df.year, [df.grant_type, df.survivor])\n", "safe_table" ] }, { "cell_type": "markdown", "id": "1c34d5ba-8200-4181-9440-ca02f4bfd2f4", "metadata": {}, "source": [ "### checking for testing purposes" ] }, { "cell_type": "code", "execution_count": 9, "id": "e4382b14-cfcf-4d01-a25a-97106852bd65", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 4 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "-------------------------------------|\n", "survivor |Dead_in_2015 |Alive_in_2015|\n", "year | | |\n", "-------------------------------------|\n", "2010 | threshold; | threshold; |\n", "2011 | threshold; | threshold; |\n", "-------------------------------------|\n", "\n", "INFO:acro:records:add(): output_2\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivorDead in 2015Alive in 2015
year
201022
201122
\n", "
" ], "text/plain": [ "survivor Dead in 2015 Alive in 2015\n", "year \n", "2010 2 2\n", "2011 2 2" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mydata = df[(df[\"charity\"].str[0] == \"W\")]\n", "mydata = mydata[mydata[\"year\"] < 2012]\n", "acro.crosstab(mydata.year, mydata.survivor)" ] }, { "cell_type": "markdown", "id": "6d4730c4", "metadata": {}, "source": [ "### ACRO crosstab with suppression" ] }, { "cell_type": "code", "execution_count": 10, "id": "37ddb939", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", "INFO:acro:outcome_df:\n", "---------------------------------------------------------------------------|\n", "grant_type |G |N |R |R/G |\n", "year | | | | |\n", "---------------------------------------------------------------------------|\n", "2010 | ok | threshold; p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", "2011 | ok | ok | ok | threshold; |\n", "2012 | ok | ok | ok | threshold; |\n", "2013 | ok | ok | ok | threshold; |\n", "2014 | ok | ok | ok | threshold; |\n", "2015 | ok | ok | ok | threshold; |\n", "---------------------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_3\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
grant_typeGNRR/G
year
20109921906.0NaN8402284.0NaN
20118502246.0124013.8593757716880.0NaN
201211458580.0131859.0625006958050.5NaN
201313557147.0147937.7968757202273.5NaN
201413748147.0133198.2500008277525.0NaN
201511133433.0146572.18750010812888.0NaN
\n", "
" ], "text/plain": [ "grant_type G N R R/G\n", "year \n", "2010 9921906.0 NaN 8402284.0 NaN\n", "2011 8502246.0 124013.859375 7716880.0 NaN\n", "2012 11458580.0 131859.062500 6958050.5 NaN\n", "2013 13557147.0 147937.796875 7202273.5 NaN\n", "2014 13748147.0 133198.250000 8277525.0 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "acro.suppress = True\n", "\n", "safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", "safe_table" ] }, { "cell_type": "markdown", "id": "0c695e09", "metadata": {}, "source": [ "### ACRO crosstab with suppression and totals" ] }, { "cell_type": "code", "execution_count": 11, "id": "ef42beb6", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\n", "INFO:acro:get_summary(): fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "------------------------------------------------------------------------------------------------------------------------------------------------|\n", "grant_type |G |N |R |R/G |All|\n", "survivor |Dead_in_2015 Alive_in_2015 |Alive_in_2015 |Dead_in_2015 Alive_in_2015 |Alive_in_2015 | |\n", "year | | | | | |\n", "------------------------------------------------------------------------------------------------------------------------------------------------|\n", "2010 | threshold; p-ratio; nk-rule; ok | threshold; p-ratio; | ok ok | threshold; p-ratio; nk-rule; | ok|\n", "2011 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", "2012 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", "2013 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", "2014 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", "2015 | threshold; p-ratio; nk-rule; threshold; | ok | ok ok | threshold; | ok|\n", "All | ok ok | ok | ok ok | ok | ok|\n", "------------------------------------------------------------------------------------------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_4\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "grant_type G N R \\\n", "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", "year \n", "2010 2 12 5 40 \n", "2011 3 12 58 45 \n", "2012 3 12 59 45 \n", "2013 3 12 59 47 \n", "2014 3 12 59 43 \n", "2015 3 9 58 28 \n", "All 17 69 298 248 \n", "\n", "grant_type R/G All \n", "survivor Alive in 2015 Alive in 2015 \n", "year \n", "2010 20 4 83 \n", "2011 24 8 150 \n", "2012 24 8 151 \n", "2013 24 8 153 \n", "2014 24 8 149 \n", "2015 23 8 129 \n", "All 139 44 815 \n" ] } ], "source": [ "acro.suppress = False\n", "table = acro.crosstab(\n", " df.year,\n", " [df.grant_type, df.survivor],\n", " values=df.inc_grants,\n", " aggfunc=\"count\",\n", " margins=True,\n", ")\n", "print(table)" ] }, { "cell_type": "code", "execution_count": 12, "id": "506135e0", "metadata": { "tags": [] }, "outputs": [], "source": [ "acro.suppress = False" ] }, { "cell_type": "markdown", "id": "8b603548", "metadata": {}, "source": [ "### ACRO crosstab with aggregation function" ] }, { "cell_type": "code", "execution_count": 13, "id": "83718cb1", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): pass\n", "INFO:acro:outcome_df:\n", "-------------------------------------|\n", "survivor |Dead_in_2015 |Alive_in_2015|\n", "year | | |\n", "-------------------------------------|\n", "2010 | ok | ok |\n", "2011 | ok | ok |\n", "2012 | ok | ok |\n", "2013 | ok | ok |\n", "2014 | ok | ok |\n", "2015 | ok | ok |\n", "-------------------------------------|\n", "\n", "INFO:acro:records:add(): output_5\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivorDead in 2015Alive in 2015
year
20101320337.75015466672.0
20111295468.0007190086.5
20121270522.1257119017.5
20131325315.5007682584.0
20141282249.6258276287.5
20151608412.2508060488.5
\n", "
" ], "text/plain": [ "survivor Dead in 2015 Alive in 2015\n", "year \n", "2010 1320337.750 15466672.0\n", "2011 1295468.000 7190086.5\n", "2012 1270522.125 7119017.5\n", "2013 1325315.500 7682584.0\n", "2014 1282249.625 8276287.5\n", "2015 1608412.250 8060488.5" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "safe_table = acro.crosstab(df.year, df.survivor, values=df.inc_grants, aggfunc=\"mean\")\n", "safe_table" ] }, { "cell_type": "markdown", "id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde", "metadata": {}, "source": [ "### ACRO crosstab with multiple aggregation functions and totals" ] }, { "cell_type": "code", "execution_count": 14, "id": "fb7abfc9-e428-4b71-9066-01ac9a08d655", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 14 cells may need suppressing; p-ratio: 4 cells may need suppressing; nk-rule: 2 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "----------------------------------------------------------------------------------------------------------------------------------------------|\n", " mean |std |\n", "grant_type G N R R/G All |G N R R/G All|\n", "year | |\n", "----------------------------------------------------------------------------------------------------------------------------------------------|\n", "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok | ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok|\n", "2011 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", "2012 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", "2013 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", "2014 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", "2015 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", "All ok ok ok ok ok | ok ok ok ok ok|\n", "----------------------------------------------------------------------------------------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_6\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
grant_typeGNRR/GAllGNRR/GAll
year
20109921906.00.0000008402284.011636000.08308286.51.855055e+070.0000003.059557e+071.701088e+072.727398e+07
20118502246.0124013.8593757716880.016047500.05303808.51.688595e+07205959.4929032.954322e+071.561638e+072.137658e+07
201211458580.0131859.0625006958050.516810000.05259893.52.061090e+07210476.5391752.721184e+071.646449e+072.026400e+07
201313557147.0147937.7968757202273.516765625.05605045.52.486844e+07203747.4170172.989833e+071.671112e+072.251787e+07
201413748147.0133198.2500008277525.017845750.06117054.53.134559e+07181865.9255803.546348e+071.741251e+072.641722e+07
201511133433.0146572.18750010812888.018278624.06509989.52.553919e+07201602.8008324.130935e+071.730471e+072.784636e+07
All11412787.0134431.8906258098502.016648273.05997796.52.283220e+07198873.7266563.204495e+071.583532e+072.405324e+07
\n", "
" ], "text/plain": [ " mean \\\n", "grant_type G N R R/G All \n", "year \n", "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.5 \n", "2011 8502246.0 124013.859375 7716880.0 16047500.0 5303808.5 \n", "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.5 \n", "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", "2014 13748147.0 133198.250000 8277525.0 17845750.0 6117054.5 \n", "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", "\n", " std \\\n", "grant_type G N R R/G \n", "year \n", "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", "\n", " \n", "grant_type All \n", "year \n", "2010 2.727398e+07 \n", "2011 2.137658e+07 \n", "2012 2.026400e+07 \n", "2013 2.251787e+07 \n", "2014 2.641722e+07 \n", "2015 2.784636e+07 \n", "All 2.405324e+07 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "safe_table = acro.crosstab(\n", " df.year, df.grant_type, values=df.inc_grants, aggfunc=[\"mean\", \"std\"], margins=True\n", ")\n", "safe_table" ] }, { "cell_type": "markdown", "id": "0aa99fbf", "metadata": {}, "source": [ "### ACRO crosstab with missing values\n", "- This is an example of a crosstab where there are missing values that have not been filled or dealt with in the data.\n", "- Note that you need to change the value of the CHECK_MISSING_VALUES variable in the acro object to True. Then run the crosstab command.\n", "- In this example, ten values in the column inc_grants were set to nan to represent missing data.\n", "- In this version of acro checking the disclosiveness of an output with missing values is not supported.\n", "- The status of the command will be \"review\" to indicate that the output needs to be checked by the output checker to review if the output is disclosive or not.\n", "- In the outcome_df each cell with missing value/values will be shown as missing.\n", "- The output hist will not be suppressed even if the suppress=True." ] }, { "cell_type": "code", "execution_count": 15, "id": "bf132239", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "--------------------------------------------------------------------------------|\n", "grant_type |G |N |R |R/G |All|\n", "year | | | | | |\n", "--------------------------------------------------------------------------------|\n", "2010 | ok | threshold; p-ratio; | ok | threshold; p-ratio; nk-rule; | ok|\n", "2011 | ok | ok | ok | threshold; | ok|\n", "2012 | ok | ok | ok | threshold; | ok|\n", "2013 | ok | ok | ok | threshold; | ok|\n", "2014 | ok | ok | ok | threshold; | ok|\n", "2015 | ok | ok | ok | threshold; | ok|\n", "All | ok | ok | ok | ok | ok|\n", "--------------------------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_7\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
grant_typeGNRR/GAll
year
20109921906.00.0000008420373.011636000.08320154.5
20118502246.0125663.2265627689140.016047500.05310392.0
201211458580.0131859.0625006896304.016810000.05220580.5
201313557147.0150488.4531257088095.516765625.05578657.0
201413748147.0135494.7812508118565.517845750.06072600.0
201511133433.0149143.62500010596385.018278624.06442131.0
All11412787.0136158.8593758006361.016648273.05968295.5
\n", "
" ], "text/plain": [ "grant_type G N R R/G All\n", "year \n", "2010 9921906.0 0.000000 8420373.0 11636000.0 8320154.5\n", "2011 8502246.0 125663.226562 7689140.0 16047500.0 5310392.0\n", "2012 11458580.0 131859.062500 6896304.0 16810000.0 5220580.5\n", "2013 13557147.0 150488.453125 7088095.5 16765625.0 5578657.0\n", "2014 13748147.0 135494.781250 8118565.5 17845750.0 6072600.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", "All 11412787.0 136158.859375 8006361.0 16648273.0 5968295.5" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "utils.CHECK_MISSING_VALUES = True\n", "\n", "missing = df.inc_grants.copy()\n", "missing[0:10] = np.nan\n", "\n", "safe_table = acro.crosstab(\n", " df.year, df.grant_type, values=missing, aggfunc=\"mean\", margins=True\n", ")\n", "safe_table" ] }, { "cell_type": "code", "execution_count": 16, "id": "7cc417a0", "metadata": { "tags": [] }, "outputs": [], "source": [ "utils.CHECK_MISSING_VALUES = False" ] }, { "cell_type": "markdown", "id": "fcc81e98", "metadata": {}, "source": [ "### ACRO crosstab with negative values" ] }, { "cell_type": "code", "execution_count": 17, "id": "15bcdc7c", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): review; negative values found\n", "INFO:acro:outcome_df:\n", "----------------------------------------|\n", "grant_type |G |N |R |R/G|\n", "year | | | | |\n", "----------------------------------------|\n", "2010 | | | negative | |\n", "2011 | | negative | negative | |\n", "2012 | | | negative | |\n", "2013 | | negative | negative | |\n", "2014 | | negative | negative | |\n", "2015 | | negative | negative | |\n", "----------------------------------------|\n", "\n", "INFO:acro:records:add(): output_8\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
grant_typeGNRR/G
year
20109921906.00.0000008280032.511636000.0
20118502246.0123496.4453127577703.516047500.0
201211458580.0131859.0625006796357.516810000.0
201313557147.0147937.6250006988263.016765625.0
201413748147.0133198.0781257997392.017845750.0
201511133433.0146572.01562510388612.018278624.0
\n", "
" ], "text/plain": [ "grant_type G N R R/G\n", "year \n", "2010 9921906.0 0.000000 8280032.5 11636000.0\n", "2011 8502246.0 123496.445312 7577703.5 16047500.0\n", "2012 11458580.0 131859.062500 6796357.5 16810000.0\n", "2013 13557147.0 147937.625000 6988263.0 16765625.0\n", "2014 13748147.0 133198.078125 7997392.0 17845750.0\n", "2015 11133433.0 146572.015625 10388612.0 18278624.0" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "negative = df.inc_grants.copy()\n", "negative[0:10] = -10\n", "\n", "safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\"mean\")\n", "safe_table" ] }, { "cell_type": "markdown", "id": "d66e565b", "metadata": {}, "source": [ "### ACRO pivot_table" ] }, { "cell_type": "code", "execution_count": 18, "id": "b13b5f7e", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", "--------------------------------------------------------------------------------------------------------------|\n", " inc_grants |\n", "year 2010 2011 2012 2013 2014 2015 All|\n", "grant_type |\n", "--------------------------------------------------------------------------------------------------------------|\n", "G ok ok ok ok ok ok ok|\n", "N threshold; p-ratio; ok ok ok ok ok ok|\n", "R ok ok ok ok ok ok ok|\n", "R/G threshold; p-ratio; nk-rule; threshold; threshold; threshold; threshold; threshold; ok|\n", "All ok ok ok ok ok ok ok|\n", "--------------------------------------------------------------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_9\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
inc_grants
year201020112012201320142015All
grant_type
G138906688.0127533696.0171878704.0203357200.0206222208.0133601200.09.814997e+08
N0.07192804.07779685.08728330.07858697.08501187.04.006070e+07
R504137056.0532464704.0480105472.0511361408.0554594176.0551457280.03.134120e+09
R/G46544000.0128380000.0134480000.0134125000.0142766000.0146228992.07.325240e+08
All689587776.0795571264.0794243904.0857571968.0911441088.0839788672.04.888204e+09
\n", "
" ], "text/plain": [ " inc_grants \\\n", "year 2010 2011 2012 2013 2014 \n", "grant_type \n", "G 138906688.0 127533696.0 171878704.0 203357200.0 206222208.0 \n", "N 0.0 7192804.0 7779685.0 8728330.0 7858697.0 \n", "R 504137056.0 532464704.0 480105472.0 511361408.0 554594176.0 \n", "R/G 46544000.0 128380000.0 134480000.0 134125000.0 142766000.0 \n", "All 689587776.0 795571264.0 794243904.0 857571968.0 911441088.0 \n", "\n", " \n", "year 2015 All \n", "grant_type \n", "G 133601200.0 9.814997e+08 \n", "N 8501187.0 4.006070e+07 \n", "R 551457280.0 3.134120e+09 \n", "R/G 146228992.0 7.325240e+08 \n", "All 839788672.0 4.888204e+09 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table = acro.pivot_table(\n", " df,\n", " index=[\"grant_type\"],\n", " columns=[\"year\"],\n", " values=[\"inc_grants\"],\n", " margins=True,\n", " aggfunc=\"sum\",\n", ")\n", "table" ] }, { "cell_type": "code", "execution_count": 19, "id": "f72162c8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): pass\n", "INFO:acro:outcome_df:\n", "---------------------------------|\n", " mean |std |\n", " inc_grants |inc_grants|\n", "grant_type | |\n", "---------------------------------|\n", "G ok | ok |\n", "N ok | ok |\n", "R ok | ok |\n", "R/G ok | ok |\n", "---------------------------------|\n", "\n", "INFO:acro:records:add(): output_10\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
inc_grantsinc_grants
grant_type
G1.141279e+072.283220e+07
N1.344319e+051.988737e+05
R8.098502e+063.204495e+07
R/G1.664827e+071.583532e+07
\n", "
" ], "text/plain": [ " mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table = acro.pivot_table(\n", " df, index=[\"grant_type\"], values=[\"inc_grants\"], aggfunc=[\"mean\", \"std\"]\n", ")\n", "table" ] }, { "cell_type": "markdown", "id": "dc99fa71", "metadata": {}, "source": [ "### ACRO pivot_table with missing values" ] }, { "cell_type": "code", "execution_count": 20, "id": "f3a87c20", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): pass\n", "INFO:acro:outcome_df:\n", "---------------------------------|\n", " mean |std |\n", " inc_grants |inc_grants|\n", "grant_type | |\n", "---------------------------------|\n", "G ok | ok |\n", "N ok | ok |\n", "R ok | ok |\n", "R/G ok | ok |\n", "---------------------------------|\n", "\n", "INFO:acro:records:add(): output_11\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
inc_grantsinc_grants
grant_type
G1.141279e+072.283220e+07
N1.364700e+051.999335e+05
R8.006361e+063.228216e+07
R/G1.664827e+071.583532e+07
\n", "
" ], "text/plain": [ " mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.364700e+05 1.999335e+05\n", "R 8.006361e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "utils.CHECK_MISSING_VALUES = True\n", "\n", "df.loc[0:10, \"inc_grants\"] = np.nan\n", "\n", "table = acro.pivot_table(\n", " df, index=[\"grant_type\"], values=[\"inc_grants\"], aggfunc=[\"mean\", \"std\"]\n", ")\n", "table" ] }, { "cell_type": "code", "execution_count": 21, "id": "8cabd236", "metadata": {}, "outputs": [], "source": [ "utils.CHECK_MISSING_VALUES = False" ] }, { "cell_type": "markdown", "id": "b1f77749", "metadata": {}, "source": [ "### ACRO pivot_table with negative values" ] }, { "cell_type": "code", "execution_count": 22, "id": "864d39f4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:get_summary(): review; negative values found\n", "INFO:acro:outcome_df:\n", "---------------------------------|\n", " mean |std |\n", " inc_grants |inc_grants|\n", "grant_type | |\n", "---------------------------------|\n", "G | |\n", "N negative | negative |\n", "R negative | negative |\n", "R/G | |\n", "---------------------------------|\n", "\n", "INFO:acro:records:add(): output_12\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
inc_grantsinc_grants
grant_type
G1.141279e+072.283220e+07
N1.341800e+051.990196e+05
R7.882230e+063.204558e+07
R/G1.664827e+071.583532e+07
\n", "
" ], "text/plain": [ " mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.341800e+05 1.990196e+05\n", "R 7.882230e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[0:10, \"inc_grants\"] = -10\n", "\n", "table = acro.pivot_table(\n", " df, index=[\"grant_type\"], values=[\"inc_grants\"], aggfunc=[\"mean\", \"std\"]\n", ")\n", "table" ] }, { "cell_type": "markdown", "id": "45ec04ef", "metadata": {}, "source": [ "### ACRO OLS" ] }, { "cell_type": "code", "execution_count": 23, "id": "d0724d08-6969-4f0a-8a32-e00d253f3597", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n", "INFO:acro:records:add(): output_13\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: inc_activity R-squared: 0.894
Model: OLS Adj. R-squared: 0.893
Method: Least Squares F-statistic: 2261.
Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.00
Time: 19:43:34 Log-Likelihood: -14495.
No. Observations: 811 AIC: 2.900e+04
Df Residuals: 807 BIC: 2.902e+04
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
const 3.01e+05 5.33e+05 0.565 0.572 -7.45e+05 1.35e+06
inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836
inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633
total_costs 0.8313 0.011 78.674 0.000 0.811 0.852
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 1339.956 Durbin-Watson: 1.414
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1253317.706
Skew: 9.899 Prob(JB): 0.00
Kurtosis: 194.566 Cond. No. 1.05e+08


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+08. This might indicate that there are
strong multicollinearity or other numerical problems." ], "text/latex": [ "\\begin{center}\n", "\\begin{tabular}{lclc}\n", "\\toprule\n", "\\textbf{Dep. Variable:} & inc\\_activity & \\textbf{ R-squared: } & 0.894 \\\\\n", "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.893 \\\\\n", "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 2261. \\\\\n", "\\textbf{Date:} & Thu, 06 Mar 2025 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n", "\\textbf{Time:} & 19:43:34 & \\textbf{ Log-Likelihood: } & -14495. \\\\\n", "\\textbf{No. Observations:} & 811 & \\textbf{ AIC: } & 2.900e+04 \\\\\n", "\\textbf{Df Residuals:} & 807 & \\textbf{ BIC: } & 2.902e+04 \\\\\n", "\\textbf{Df Model:} & 3 & \\textbf{ } & \\\\\n", "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\\begin{tabular}{lcccccc}\n", " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", "\\midrule\n", "\\textbf{const} & 3.01e+05 & 5.33e+05 & 0.565 & 0.572 & -7.45e+05 & 1.35e+06 \\\\\n", "\\textbf{inc\\_grants} & -0.8846 & 0.025 & -35.956 & 0.000 & -0.933 & -0.836 \\\\\n", "\\textbf{inc\\_donations} & -0.6647 & 0.016 & -40.721 & 0.000 & -0.697 & -0.633 \\\\\n", "\\textbf{total\\_costs} & 0.8313 & 0.011 & 78.674 & 0.000 & 0.811 & 0.852 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\\begin{tabular}{lclc}\n", "\\textbf{Omnibus:} & 1339.956 & \\textbf{ Durbin-Watson: } & 1.414 \\\\\n", "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 1253317.706 \\\\\n", "\\textbf{Skew:} & 9.899 & \\textbf{ Prob(JB): } & 0.00 \\\\\n", "\\textbf{Kurtosis:} & 194.566 & \\textbf{ Cond. No. } & 1.05e+08 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "%\\caption{OLS Regression Results}\n", "\\end{center}\n", "\n", "Notes: \\newline\n", " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline\n", " [2] The condition number is large, 1.05e+08. This might indicate that there are \\newline\n", " strong multicollinearity or other numerical problems." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", "Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.00\n", "Time: 19:43:34 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "=================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "---------------------------------------------------------------------------------\n", "const 3.01e+05 5.33e+05 0.565 0.572 -7.45e+05 1.35e+06\n", "inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836\n", "inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633\n", "total_costs 0.8313 0.011 78.674 0.000 0.811 0.852\n", "==============================================================================\n", "Omnibus: 1339.956 Durbin-Watson: 1.414\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1253317.706\n", "Skew: 9.899 Prob(JB): 0.00\n", "Kurtosis: 194.566 Cond. No. 1.05e+08\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 1.05e+08. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df = df[[\"inc_activity\", \"inc_grants\", \"inc_donations\", \"total_costs\"]]\n", "new_df = new_df.dropna()\n", "\n", "y = new_df[\"inc_activity\"]\n", "x = new_df[[\"inc_grants\", \"inc_donations\", \"total_costs\"]]\n", "x = add_constant(x)\n", "\n", "results = acro.ols(y, x)\n", "results.summary()" ] }, { "cell_type": "markdown", "id": "0c826271", "metadata": {}, "source": [ "### ACRO OLSR" ] }, { "cell_type": "code", "execution_count": 24, "id": "cc90f7c9", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n", "INFO:acro:records:add(): output_14\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: inc_activity R-squared: 0.894
Model: OLS Adj. R-squared: 0.893
Method: Least Squares F-statistic: 2261.
Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.00
Time: 19:43:34 Log-Likelihood: -14495.
No. Observations: 811 AIC: 2.900e+04
Df Residuals: 807 BIC: 2.902e+04
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 3.01e+05 5.33e+05 0.565 0.572 -7.45e+05 1.35e+06
inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836
inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633
total_costs 0.8313 0.011 78.674 0.000 0.811 0.852
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 1339.956 Durbin-Watson: 1.414
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1253317.706
Skew: 9.899 Prob(JB): 0.00
Kurtosis: 194.566 Cond. No. 1.05e+08


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+08. This might indicate that there are
strong multicollinearity or other numerical problems." ], "text/latex": [ "\\begin{center}\n", "\\begin{tabular}{lclc}\n", "\\toprule\n", "\\textbf{Dep. Variable:} & inc\\_activity & \\textbf{ R-squared: } & 0.894 \\\\\n", "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.893 \\\\\n", "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 2261. \\\\\n", "\\textbf{Date:} & Thu, 06 Mar 2025 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n", "\\textbf{Time:} & 19:43:34 & \\textbf{ Log-Likelihood: } & -14495. \\\\\n", "\\textbf{No. Observations:} & 811 & \\textbf{ AIC: } & 2.900e+04 \\\\\n", "\\textbf{Df Residuals:} & 807 & \\textbf{ BIC: } & 2.902e+04 \\\\\n", "\\textbf{Df Model:} & 3 & \\textbf{ } & \\\\\n", "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\\begin{tabular}{lcccccc}\n", " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", "\\midrule\n", "\\textbf{Intercept} & 3.01e+05 & 5.33e+05 & 0.565 & 0.572 & -7.45e+05 & 1.35e+06 \\\\\n", "\\textbf{inc\\_grants} & -0.8846 & 0.025 & -35.956 & 0.000 & -0.933 & -0.836 \\\\\n", "\\textbf{inc\\_donations} & -0.6647 & 0.016 & -40.721 & 0.000 & -0.697 & -0.633 \\\\\n", "\\textbf{total\\_costs} & 0.8313 & 0.011 & 78.674 & 0.000 & 0.811 & 0.852 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\\begin{tabular}{lclc}\n", "\\textbf{Omnibus:} & 1339.956 & \\textbf{ Durbin-Watson: } & 1.414 \\\\\n", "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 1253317.706 \\\\\n", "\\textbf{Skew:} & 9.899 & \\textbf{ Prob(JB): } & 0.00 \\\\\n", "\\textbf{Kurtosis:} & 194.566 & \\textbf{ Cond. No. } & 1.05e+08 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "%\\caption{OLS Regression Results}\n", "\\end{center}\n", "\n", "Notes: \\newline\n", " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline\n", " [2] The condition number is large, 1.05e+08. This might indicate that there are \\newline\n", " strong multicollinearity or other numerical problems." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", "Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.00\n", "Time: 19:43:34 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "=================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "---------------------------------------------------------------------------------\n", "Intercept 3.01e+05 5.33e+05 0.565 0.572 -7.45e+05 1.35e+06\n", "inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836\n", "inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633\n", "total_costs 0.8313 0.011 78.674 0.000 0.811 0.852\n", "==============================================================================\n", "Omnibus: 1339.956 Durbin-Watson: 1.414\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1253317.706\n", "Skew: 9.899 Prob(JB): 0.00\n", "Kurtosis: 194.566 Cond. No. 1.05e+08\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 1.05e+08. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = acro.olsr(\n", " formula=\"inc_activity ~ inc_grants + inc_donations + total_costs\", data=new_df\n", ")\n", "results.summary()" ] }, { "cell_type": "markdown", "id": "0c489203", "metadata": {}, "source": [ "### ACRO Probit" ] }, { "cell_type": "code", "execution_count": 25, "id": "5b1a1611", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n", "INFO:acro:records:add(): output_15\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.493791\n", " Iterations 10\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Probit Regression Results
Dep. Variable: survivor No. Observations: 811
Model: Probit Df Residuals: 806
Method: MLE Df Model: 4
Date: Thu, 06 Mar 2025 Pseudo R-squ.: 0.2140
Time: 19:43:34 Log-Likelihood: -400.46
converged: True LL-Null: -509.50
Covariance Type: nonrobust LLR p-value: 4.875e-46
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [0.025 0.975]
const 0.0474 0.057 0.838 0.402 -0.063 0.158
inc_activity 1.836e-07 5.16e-08 3.559 0.000 8.25e-08 2.85e-07
inc_grants 8.576e-08 3.9e-08 2.197 0.028 9.25e-09 1.62e-07
inc_donations 2.406e-07 4.54e-08 5.297 0.000 1.52e-07 3.3e-07
total_costs -8.644e-08 3.68e-08 -2.351 0.019 -1.59e-07 -1.44e-08


Possibly complete quasi-separation: A fraction 0.18 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified." ], "text/latex": [ "\\begin{center}\n", "\\begin{tabular}{lclc}\n", "\\toprule\n", "\\textbf{Dep. Variable:} & survivor & \\textbf{ No. Observations: } & 811 \\\\\n", "\\textbf{Model:} & Probit & \\textbf{ Df Residuals: } & 806 \\\\\n", "\\textbf{Method:} & MLE & \\textbf{ Df Model: } & 4 \\\\\n", "\\textbf{Date:} & Thu, 06 Mar 2025 & \\textbf{ Pseudo R-squ.: } & 0.2140 \\\\\n", "\\textbf{Time:} & 19:43:34 & \\textbf{ Log-Likelihood: } & -400.46 \\\\\n", "\\textbf{converged:} & True & \\textbf{ LL-Null: } & -509.50 \\\\\n", "\\textbf{Covariance Type:} & nonrobust & \\textbf{ LLR p-value: } & 4.875e-46 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\\begin{tabular}{lcccccc}\n", " & \\textbf{coef} & \\textbf{std err} & \\textbf{z} & \\textbf{P$> |$z$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", "\\midrule\n", "\\textbf{const} & 0.0474 & 0.057 & 0.838 & 0.402 & -0.063 & 0.158 \\\\\n", "\\textbf{inc\\_activity} & 1.836e-07 & 5.16e-08 & 3.559 & 0.000 & 8.25e-08 & 2.85e-07 \\\\\n", "\\textbf{inc\\_grants} & 8.576e-08 & 3.9e-08 & 2.197 & 0.028 & 9.25e-09 & 1.62e-07 \\\\\n", "\\textbf{inc\\_donations} & 2.406e-07 & 4.54e-08 & 5.297 & 0.000 & 1.52e-07 & 3.3e-07 \\\\\n", "\\textbf{total\\_costs} & -8.644e-08 & 3.68e-08 & -2.351 & 0.019 & -1.59e-07 & -1.44e-08 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "%\\caption{Probit Regression Results}\n", "\\end{center}\n", "\n", "Possibly complete quasi-separation: A fraction 0.18 of observations can be \\newline\n", " perfectly predicted. This might indicate that there is complete \\newline\n", " quasi-separation. In this case some parameters will not be identified." ], "text/plain": [ "\n", "\"\"\"\n", " Probit Regression Results \n", "==============================================================================\n", "Dep. Variable: survivor No. Observations: 811\n", "Model: Probit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", "Date: Thu, 06 Mar 2025 Pseudo R-squ.: 0.2140\n", "Time: 19:43:34 Log-Likelihood: -400.46\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.875e-46\n", "=================================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------\n", "const 0.0474 0.057 0.838 0.402 -0.063 0.158\n", "inc_activity 1.836e-07 5.16e-08 3.559 0.000 8.25e-08 2.85e-07\n", "inc_grants 8.576e-08 3.9e-08 2.197 0.028 9.25e-09 1.62e-07\n", "inc_donations 2.406e-07 4.54e-08 5.297 0.000 1.52e-07 3.3e-07\n", "total_costs -8.644e-08 3.68e-08 -2.351 0.019 -1.59e-07 -1.44e-08\n", "=================================================================================\n", "\n", "Possibly complete quasi-separation: A fraction 0.18 of observations can be\n", "perfectly predicted. This might indicate that there is complete\n", "quasi-separation. In this case some parameters will not be identified.\n", "\"\"\"" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df = df[[\"survivor\", \"inc_activity\", \"inc_grants\", \"inc_donations\", \"total_costs\"]]\n", "new_df = new_df.dropna()\n", "\n", "y = new_df[\"survivor\"].astype(\"category\").cat.codes # numeric\n", "y.name = \"survivor\"\n", "x = new_df[[\"inc_activity\", \"inc_grants\", \"inc_donations\", \"total_costs\"]]\n", "x = add_constant(x)\n", "\n", "results = acro.probit(y, x)\n", "results.summary()" ] }, { "cell_type": "markdown", "id": "22efa3df", "metadata": {}, "source": [ "### ACRO Logit" ] }, { "cell_type": "code", "execution_count": 26, "id": "dcf30f8f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n", "INFO:acro:records:add(): output_16\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.490836\n", " Iterations 12\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Logit Regression Results
Dep. Variable: survivor No. Observations: 811
Model: Logit Df Residuals: 806
Method: MLE Df Model: 4
Date: Thu, 06 Mar 2025 Pseudo R-squ.: 0.2187
Time: 19:43:35 Log-Likelihood: -398.07
converged: True LL-Null: -509.50
Covariance Type: nonrobust LLR p-value: 4.532e-47
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [0.025 0.975]
const 0.0512 0.091 0.561 0.575 -0.128 0.230
inc_activity 2.981e-07 8.95e-08 3.330 0.001 1.23e-07 4.74e-07
inc_grants 1.351e-07 6.67e-08 2.026 0.043 4.39e-09 2.66e-07
inc_donations 5.123e-07 1.04e-07 4.927 0.000 3.08e-07 7.16e-07
total_costs -1.442e-07 6.26e-08 -2.304 0.021 -2.67e-07 -2.15e-08


Possibly complete quasi-separation: A fraction 0.18 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified." ], "text/latex": [ "\\begin{center}\n", "\\begin{tabular}{lclc}\n", "\\toprule\n", "\\textbf{Dep. Variable:} & survivor & \\textbf{ No. Observations: } & 811 \\\\\n", "\\textbf{Model:} & Logit & \\textbf{ Df Residuals: } & 806 \\\\\n", "\\textbf{Method:} & MLE & \\textbf{ Df Model: } & 4 \\\\\n", "\\textbf{Date:} & Thu, 06 Mar 2025 & \\textbf{ Pseudo R-squ.: } & 0.2187 \\\\\n", "\\textbf{Time:} & 19:43:35 & \\textbf{ Log-Likelihood: } & -398.07 \\\\\n", "\\textbf{converged:} & True & \\textbf{ LL-Null: } & -509.50 \\\\\n", "\\textbf{Covariance Type:} & nonrobust & \\textbf{ LLR p-value: } & 4.532e-47 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\\begin{tabular}{lcccccc}\n", " & \\textbf{coef} & \\textbf{std err} & \\textbf{z} & \\textbf{P$> |$z$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", "\\midrule\n", "\\textbf{const} & 0.0512 & 0.091 & 0.561 & 0.575 & -0.128 & 0.230 \\\\\n", "\\textbf{inc\\_activity} & 2.981e-07 & 8.95e-08 & 3.330 & 0.001 & 1.23e-07 & 4.74e-07 \\\\\n", "\\textbf{inc\\_grants} & 1.351e-07 & 6.67e-08 & 2.026 & 0.043 & 4.39e-09 & 2.66e-07 \\\\\n", "\\textbf{inc\\_donations} & 5.123e-07 & 1.04e-07 & 4.927 & 0.000 & 3.08e-07 & 7.16e-07 \\\\\n", "\\textbf{total\\_costs} & -1.442e-07 & 6.26e-08 & -2.304 & 0.021 & -2.67e-07 & -2.15e-08 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "%\\caption{Logit Regression Results}\n", "\\end{center}\n", "\n", "Possibly complete quasi-separation: A fraction 0.18 of observations can be \\newline\n", " perfectly predicted. This might indicate that there is complete \\newline\n", " quasi-separation. In this case some parameters will not be identified." ], "text/plain": [ "\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: survivor No. Observations: 811\n", "Model: Logit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", "Date: Thu, 06 Mar 2025 Pseudo R-squ.: 0.2187\n", "Time: 19:43:35 Log-Likelihood: -398.07\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.532e-47\n", "=================================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------\n", "const 0.0512 0.091 0.561 0.575 -0.128 0.230\n", "inc_activity 2.981e-07 8.95e-08 3.330 0.001 1.23e-07 4.74e-07\n", "inc_grants 1.351e-07 6.67e-08 2.026 0.043 4.39e-09 2.66e-07\n", "inc_donations 5.123e-07 1.04e-07 4.927 0.000 3.08e-07 7.16e-07\n", "total_costs -1.442e-07 6.26e-08 -2.304 0.021 -2.67e-07 -2.15e-08\n", "=================================================================================\n", "\n", "Possibly complete quasi-separation: A fraction 0.18 of observations can be\n", "perfectly predicted. This might indicate that there is complete\n", "quasi-separation. In this case some parameters will not be identified.\n", "\"\"\"" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = acro.logit(y, x)\n", "results.summary()" ] }, { "cell_type": "markdown", "id": "3631a59d", "metadata": {}, "source": [ "### ACRO Histogram without suppression" ] }, { "cell_type": "code", "execution_count": 27, "id": "af2f4313", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:status: fail\n", "INFO:acro:records:add(): output_17\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "acro_artifacts/histogram_0.png\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "hist = acro.hist(df, \"inc_grants\")\n", "print(hist)" ] }, { "cell_type": "markdown", "id": "5faf9a98", "metadata": {}, "source": [ "### ACRO Histogram with suppression" ] }, { "cell_type": "code", "execution_count": 28, "id": "349d8a29", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:acro:Histogram will not be shown as the inc_grants column is disclosive.\n", "INFO:acro:status: fail\n", "INFO:acro:records:add(): output_18\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "acro.suppress = True\n", "hist = acro.hist(df, \"inc_grants\")" ] }, { "cell_type": "code", "execution_count": 29, "id": "ab0fe892", "metadata": {}, "outputs": [], "source": [ "acro.suppress = False" ] }, { "cell_type": "markdown", "id": "589fedc6", "metadata": {}, "source": [ "### List current ACRO outputs" ] }, { "cell_type": "code", "execution_count": 30, "id": "ec960039", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "uid: output_0\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 12, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 5]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(df.year, [df.survivor, df.grant_type])\n", "summary: fail; threshold: 12 cells may need suppressing; \n", "outcome: survivor Dead_in_2015 Alive_in_2015 \n", "grant_type G R G N R R/G\n", "year \n", "2010 threshold; ok ok ok ok threshold; \n", "2011 threshold; ok ok ok ok threshold; \n", "2012 threshold; ok ok ok ok threshold; \n", "2013 threshold; ok ok ok ok threshold; \n", "2014 threshold; ok ok ok ok threshold; \n", "2015 threshold; ok ok ok ok threshold; \n", "output: [survivor Dead in 2015 Alive in 2015 \n", "grant_type G R G N R R/G\n", "year \n", "2010 3 47 12 59 24 8\n", "2011 3 47 12 59 24 8\n", "2012 3 47 12 59 24 8\n", "2013 3 47 12 59 24 8\n", "2014 3 47 12 59 24 8\n", "2015 3 47 12 59 24 8]\n", "timestamp: 2025-03-06T19:43:34.383332\n", "comments: []\n", "exception: \n", "\n", "uid: output_1\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 12, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 5]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(df.year, [df.grant_type, df.survivor])\n", "summary: fail; threshold: 12 cells may need suppressing; \n", "outcome: grant_type G N R \\\n", "survivor Dead_in_2015 Alive_in_2015 Alive_in_2015 Dead_in_2015 \n", "year \n", "2010 threshold; ok ok ok \n", "2011 threshold; ok ok ok \n", "2012 threshold; ok ok ok \n", "2013 threshold; ok ok ok \n", "2014 threshold; ok ok ok \n", "2015 threshold; ok ok ok \n", "\n", "grant_type R/G \n", "survivor Alive_in_2015 Alive_in_2015 \n", "year \n", "2010 ok threshold; \n", "2011 ok threshold; \n", "2012 ok threshold; \n", "2013 ok threshold; \n", "2014 ok threshold; \n", "2015 ok threshold; \n", "output: [grant_type G N R \\\n", "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", "year \n", "2010 3 12 59 47 \n", "2011 3 12 59 47 \n", "2012 3 12 59 47 \n", "2013 3 12 59 47 \n", "2014 3 12 59 47 \n", "2015 3 12 59 47 \n", "\n", "grant_type R/G \n", "survivor Alive in 2015 Alive in 2015 \n", "year \n", "2010 24 8 \n", "2011 24 8 \n", "2012 24 8 \n", "2013 24 8 \n", "2014 24 8 \n", "2015 24 8 ]\n", "timestamp: 2025-03-06T19:43:34.405157\n", "comments: []\n", "exception: \n", "\n", "uid: output_2\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 1], [1, 0], [1, 1]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: acro.crosstab(mydata.year, mydata.survivor)\n", "summary: fail; threshold: 4 cells may need suppressing; \n", "outcome: survivor Dead_in_2015 Alive_in_2015\n", "year \n", "2010 threshold; threshold; \n", "2011 threshold; threshold; \n", "output: [survivor Dead in 2015 Alive in 2015\n", "year \n", "2010 2 2\n", "2011 2 2]\n", "timestamp: 2025-03-06T19:43:34.423743\n", "comments: []\n", "exception: \n", "\n", "uid: output_3\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", "outcome: grant_type G N R R/G\n", "year \n", "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n", "2011 ok ok ok threshold; \n", "2012 ok ok ok threshold; \n", "2013 ok ok ok threshold; \n", "2014 ok ok ok threshold; \n", "2015 ok ok ok threshold; \n", "output: [grant_type G N R R/G\n", "year \n", "2010 9921906.0 NaN 8402284.0 NaN\n", "2011 8502246.0 124013.859375 7716880.0 NaN\n", "2012 11458580.0 131859.062500 6958050.5 NaN\n", "2013 13557147.0 147937.796875 7202273.5 NaN\n", "2014 13748147.0 133198.250000 8277525.0 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN]\n", "timestamp: 2025-03-06T19:43:34.454789\n", "comments: []\n", "exception: \n", "\n", "uid: output_4\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'all-values-are-same': []}}\n", "command: table = acro.crosstab(\n", "summary: fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n", "outcome: grant_type G N \\\n", "survivor Dead_in_2015 Alive_in_2015 Alive_in_2015 \n", "year \n", "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n", "2011 threshold; p-ratio; nk-rule; ok ok \n", "2012 threshold; p-ratio; nk-rule; ok ok \n", "2013 threshold; p-ratio; nk-rule; ok ok \n", "2014 threshold; p-ratio; nk-rule; ok ok \n", "2015 threshold; p-ratio; nk-rule; threshold; ok \n", "All ok ok ok \n", "\n", "grant_type R R/G All \n", "survivor Dead_in_2015 Alive_in_2015 Alive_in_2015 \n", "year \n", "2010 ok ok threshold; p-ratio; nk-rule; ok \n", "2011 ok ok threshold; ok \n", "2012 ok ok threshold; ok \n", "2013 ok ok threshold; ok \n", "2014 ok ok threshold; ok \n", "2015 ok ok threshold; ok \n", "All ok ok ok ok \n", "output: [grant_type G N R \\\n", "survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \n", "year \n", "2010 2 12 5 40 \n", "2011 3 12 58 45 \n", "2012 3 12 59 45 \n", "2013 3 12 59 47 \n", "2014 3 12 59 43 \n", "2015 3 9 58 28 \n", "All 17 69 298 248 \n", "\n", "grant_type R/G All \n", "survivor Alive in 2015 Alive in 2015 \n", "year \n", "2010 20 4 83 \n", "2011 24 8 150 \n", "2012 24 8 151 \n", "2013 24 8 153 \n", "2014 24 8 149 \n", "2015 23 8 129 \n", "All 139 44 815 ]\n", "timestamp: 2025-03-06T19:43:34.523720\n", "comments: [\"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.\"]\n", "exception: \n", "\n", "uid: output_5\n", "status: pass\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(df.year, df.survivor, values=df.inc_grants, aggfunc=\"mean\")\n", "summary: pass\n", "outcome: survivor Dead_in_2015 Alive_in_2015\n", "year \n", "2010 ok ok\n", "2011 ok ok\n", "2012 ok ok\n", "2013 ok ok\n", "2014 ok ok\n", "2015 ok ok\n", "output: [survivor Dead in 2015 Alive in 2015\n", "year \n", "2010 1320337.750 15466672.0\n", "2011 1295468.000 7190086.5\n", "2012 1270522.125 7119017.5\n", "2013 1325315.500 7682584.0\n", "2014 1282249.625 8276287.5\n", "2015 1608412.250 8060488.5]\n", "timestamp: 2025-03-06T19:43:34.555960\n", "comments: []\n", "exception: \n", "\n", "uid: output_6\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 4, 'nk-rule': 2, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [0, 6], [0, 8], [1, 3], [1, 8], [2, 3], [2, 8], [3, 3], [3, 8], [4, 3], [4, 8], [5, 3], [5, 8]], 'p-ratio': [[0, 1], [0, 3], [0, 6], [0, 8]], 'nk-rule': [[0, 3], [0, 8]], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(\n", "summary: fail; threshold: 14 cells may need suppressing; p-ratio: 4 cells may need suppressing; nk-rule: 2 cells may need suppressing; \n", "outcome: mean \\\n", "grant_type G N R R/G All \n", "year \n", "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok \n", "2011 ok ok ok threshold; ok \n", "2012 ok ok ok threshold; ok \n", "2013 ok ok ok threshold; ok \n", "2014 ok ok ok threshold; ok \n", "2015 ok ok ok threshold; ok \n", "All ok ok ok ok ok \n", "\n", " std \n", "grant_type G N R R/G All \n", "year \n", "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok \n", "2011 ok ok ok threshold; ok \n", "2012 ok ok ok threshold; ok \n", "2013 ok ok ok threshold; ok \n", "2014 ok ok ok threshold; ok \n", "2015 ok ok ok threshold; ok \n", "All ok ok ok ok ok \n", "output: [ mean \\\n", "grant_type G N R R/G All \n", "year \n", "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.5 \n", "2011 8502246.0 124013.859375 7716880.0 16047500.0 5303808.5 \n", "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.5 \n", "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", "2014 13748147.0 133198.250000 8277525.0 17845750.0 6117054.5 \n", "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", "\n", " std \\\n", "grant_type G N R R/G \n", "year \n", "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", "\n", " \n", "grant_type All \n", "year \n", "2010 2.727398e+07 \n", "2011 2.137658e+07 \n", "2012 2.026400e+07 \n", "2013 2.251787e+07 \n", "2014 2.641722e+07 \n", "2015 2.784636e+07 \n", "All 2.405324e+07 ]\n", "timestamp: 2025-03-06T19:43:34.639249\n", "comments: []\n", "exception: \n", "\n", "uid: output_7\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(\n", "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "outcome: grant_type G N R R/G All\n", "year \n", "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok\n", "2011 ok ok ok threshold; ok\n", "2012 ok ok ok threshold; ok\n", "2013 ok ok ok threshold; ok\n", "2014 ok ok ok threshold; ok\n", "2015 ok ok ok threshold; ok\n", "All ok ok ok ok ok\n", "output: [grant_type G N R R/G All\n", "year \n", "2010 9921906.0 0.000000 8420373.0 11636000.0 8320154.5\n", "2011 8502246.0 125663.226562 7689140.0 16047500.0 5310392.0\n", "2012 11458580.0 131859.062500 6896304.0 16810000.0 5220580.5\n", "2013 13557147.0 150488.453125 7088095.5 16765625.0 5578657.0\n", "2014 13748147.0 135494.781250 8118565.5 17845750.0 6072600.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", "All 11412787.0 136158.859375 8006361.0 16648273.0 5968295.5]\n", "timestamp: 2025-03-06T19:43:34.695759\n", "comments: []\n", "exception: \n", "\n", "uid: output_8\n", "status: review\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 10, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [[0, 2], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 1], [5, 2]], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\"mean\")\n", "summary: review; negative values found\n", "outcome: grant_type G N R R/G\n", "year \n", "2010 negative \n", "2011 negative negative \n", "2012 negative \n", "2013 negative negative \n", "2014 negative negative \n", "2015 negative negative \n", "output: [grant_type G N R R/G\n", "year \n", "2010 9921906.0 0.000000 8280032.5 11636000.0\n", "2011 8502246.0 123496.445312 7577703.5 16047500.0\n", "2012 11458580.0 131859.062500 6796357.5 16810000.0\n", "2013 13557147.0 147937.625000 6988263.0 16765625.0\n", "2014 13748147.0 133198.078125 7997392.0 17845750.0\n", "2015 11133433.0 146572.015625 10388612.0 18278624.0]\n", "timestamp: 2025-03-06T19:43:34.727227\n", "comments: []\n", "exception: \n", "\n", "uid: output_9\n", "status: fail\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[1, 0], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5]], 'p-ratio': [[1, 0], [3, 0]], 'nk-rule': [[3, 0]], 'all-values-are-same': []}}\n", "command: table = acro.pivot_table(\n", "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "outcome: inc_grants \\\n", "year 2010 2011 2012 \n", "grant_type \n", "G ok ok ok \n", "N threshold; p-ratio; ok ok \n", "R ok ok ok \n", "R/G threshold; p-ratio; nk-rule; threshold; threshold; \n", "All ok ok ok \n", "\n", " \n", "year 2013 2014 2015 All \n", "grant_type \n", "G ok ok ok ok \n", "N ok ok ok ok \n", "R ok ok ok ok \n", "R/G threshold; threshold; threshold; ok \n", "All ok ok ok ok \n", "output: [ inc_grants \\\n", "year 2010 2011 2012 2013 2014 \n", "grant_type \n", "G 138906688.0 127533696.0 171878704.0 203357200.0 206222208.0 \n", "N 0.0 7192804.0 7779685.0 8728330.0 7858697.0 \n", "R 504137056.0 532464704.0 480105472.0 511361408.0 554594176.0 \n", "R/G 46544000.0 128380000.0 134480000.0 134125000.0 142766000.0 \n", "All 689587776.0 795571264.0 794243904.0 857571968.0 911441088.0 \n", "\n", " \n", "year 2015 All \n", "grant_type \n", "G 133601200.0 9.814997e+08 \n", "N 8501187.0 4.006070e+07 \n", "R 551457280.0 3.134120e+09 \n", "R/G 146228992.0 7.325240e+08 \n", "All 839788672.0 4.888204e+09 ]\n", "timestamp: 2025-03-06T19:43:34.782873\n", "comments: []\n", "exception: \n", "\n", "uid: output_10\n", "status: pass\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: table = acro.pivot_table(\n", "summary: pass\n", "outcome: mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G ok ok\n", "N ok ok\n", "R ok ok\n", "R/G ok ok\n", "output: [ mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", "timestamp: 2025-03-06T19:43:34.814054\n", "comments: []\n", "exception: \n", "\n", "uid: output_11\n", "status: pass\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: table = acro.pivot_table(\n", "summary: pass\n", "outcome: mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G ok ok\n", "N ok ok\n", "R ok ok\n", "R/G ok ok\n", "output: [ mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.364700e+05 1.999335e+05\n", "R 8.006361e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", "timestamp: 2025-03-06T19:43:34.844023\n", "comments: []\n", "exception: \n", "\n", "uid: output_12\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: table = acro.pivot_table(\n", "summary: review; negative values found\n", "outcome: mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G \n", "N negative negative\n", "R negative negative\n", "R/G \n", "output: [ mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.341800e+05 1.990196e+05\n", "R 7.882230e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", "timestamp: 2025-03-06T19:43:34.872024\n", "comments: []\n", "exception: \n", "\n", "uid: output_13\n", "status: pass\n", "type: regression\n", "properties: {'method': 'ols', 'dof': 807.0}\n", "sdc: {}\n", "command: results = acro.ols(y, x)\n", "summary: pass; dof=807.0 >= 10\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: [ inc_activity R-squared: 0.894\n", "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", "Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.000\n", "Time: 19:43:34 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", "Covariance Type: nonrobust NaN NaN, coef std err t P>|t| [0.025 0.975]\n", "const 301000.0000 533000.000 0.565 0.572 -745000.000 1350000.000\n", "inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836\n", "inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633\n", "total_costs 0.8313 0.011 78.674 0.000 0.811 0.852, 1339.956 Durbin-Watson: 1.414\n", "Omnibus: \n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", "timestamp: 2025-03-06T19:43:34.926753\n", "comments: []\n", "exception: \n", "\n", "uid: output_14\n", "status: pass\n", "type: regression\n", "properties: {'method': 'olsr', 'dof': 807.0}\n", "sdc: {}\n", "command: results = acro.olsr(\n", "summary: pass; dof=807.0 >= 10\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: [ inc_activity R-squared: 0.894\n", "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", "Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.000\n", "Time: 19:43:34 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", "Covariance Type: nonrobust NaN NaN, coef std err t P>|t| [0.025 0.975]\n", "Intercept 301000.0000 533000.000 0.565 0.572 -745000.000 1350000.000\n", "inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836\n", "inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633\n", "total_costs 0.8313 0.011 78.674 0.000 0.811 0.852, 1339.956 Durbin-Watson: 1.414\n", "Omnibus: \n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", "timestamp: 2025-03-06T19:43:34.954407\n", "comments: []\n", "exception: \n", "\n", "uid: output_15\n", "status: pass\n", "type: regression\n", "properties: {'method': 'probit', 'dof': 806.0}\n", "sdc: {}\n", "command: results = acro.probit(y, x)\n", "summary: pass; dof=806.0 >= 10\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: [ survivor No. Observations: 811\n", "Dep. Variable: \n", "Model: Probit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", "Date: Thu, 06 Mar 2025 Pseudo R-squ.: 2.140000e-01\n", "Time: 19:43:34 Log-Likelihood: -4.004600e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n", "const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n", "inc_activity 1.836000e-07 5.160000e-08 3.559 0.000 8.250000e-08 \n", "inc_grants 8.576000e-08 3.900000e-08 2.197 0.028 9.250000e-09 \n", "inc_donations 2.406000e-07 4.540000e-08 5.297 0.000 1.520000e-07 \n", "total_costs -8.644000e-08 3.680000e-08 -2.351 0.019 -1.590000e-07 \n", "\n", " 0.975] \n", "const 1.580000e-01 \n", "inc_activity 2.850000e-07 \n", "inc_grants 1.620000e-07 \n", "inc_donations 3.300000e-07 \n", "total_costs -1.440000e-08 ]\n", "timestamp: 2025-03-06T19:43:34.982272\n", "comments: []\n", "exception: \n", "\n", "uid: output_16\n", "status: pass\n", "type: regression\n", "properties: {'method': 'logit', 'dof': 806.0}\n", "sdc: {}\n", "command: results = acro.logit(y, x)\n", "summary: pass; dof=806.0 >= 10\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: [ survivor No. Observations: 811\n", "Dep. Variable: \n", "Model: Logit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", "Date: Thu, 06 Mar 2025 Pseudo R-squ.: 2.187000e-01\n", "Time: 19:43:35 Log-Likelihood: -3.980700e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n", "const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n", "inc_activity 2.981000e-07 8.950000e-08 3.330 0.001 1.230000e-07 \n", "inc_grants 1.351000e-07 6.670000e-08 2.026 0.043 4.390000e-09 \n", "inc_donations 5.123000e-07 1.040000e-07 4.927 0.000 3.080000e-07 \n", "total_costs -1.442000e-07 6.260000e-08 -2.304 0.021 -2.670000e-07 \n", "\n", " 0.975] \n", "const 2.300000e-01 \n", "inc_activity 4.740000e-07 \n", "inc_grants 2.660000e-07 \n", "inc_donations 7.160000e-07 \n", "total_costs -2.150000e-08 ]\n", "timestamp: 2025-03-06T19:43:35.004433\n", "comments: []\n", "exception: \n", "\n", "uid: output_17\n", "status: fail\n", "type: histogram\n", "properties: {'method': 'histogram'}\n", "sdc: {}\n", "command: hist = acro.hist(df, \"inc_grants\")\n", "summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: ['acro_artifacts/histogram_0.png']\n", "timestamp: 2025-03-06T19:43:35.149884\n", "comments: []\n", "exception: \n", "\n", "uid: output_18\n", "status: fail\n", "type: histogram\n", "properties: {'method': 'histogram'}\n", "sdc: {}\n", "command: hist = acro.hist(df, \"inc_grants\")\n", "summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: ['acro_artifacts/histogram_1.png']\n", "timestamp: 2025-03-06T19:43:35.204307\n", "comments: []\n", "exception: \n", "\n", "\n" ] } ], "source": [ "results_str = acro.print_outputs()" ] }, { "cell_type": "markdown", "id": "f78b5a08", "metadata": {}, "source": [ "### Remove some ACRO outputs before finalising" ] }, { "cell_type": "code", "execution_count": 31, "id": "6211a9cf", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:remove(): output_1 removed\n", "INFO:acro:records:remove(): output_4 removed\n" ] } ], "source": [ "acro.remove_output(\"output_1\")\n", "acro.remove_output(\"output_4\")" ] }, { "cell_type": "markdown", "id": "df2a02e0", "metadata": {}, "source": [ "### Rename ACRO outputs before finalising" ] }, { "cell_type": "code", "execution_count": 32, "id": "c9864a29", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:rename_output(): output_2 renamed to pivot_table\n" ] } ], "source": [ "acro.rename_output(\"output_2\", \"pivot_table\")" ] }, { "cell_type": "markdown", "id": "56d2b6a1", "metadata": {}, "source": [ "### Add a comment to output" ] }, { "cell_type": "code", "execution_count": 33, "id": "b392be9f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:a comment was added to output_0\n", "INFO:acro:records:a comment was added to output_0\n" ] } ], "source": [ "acro.add_comments(\"output_0\", \"This is a cross table between year and grant_type\")\n", "acro.add_comments(\"output_0\", \"6 cells were suppressed in this table\")" ] }, { "cell_type": "markdown", "id": "8496fed4", "metadata": {}, "source": [ "### Add an unsupported output to the list of outputs" ] }, { "cell_type": "code", "execution_count": 34, "id": "2816eac7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:add_custom(): output_19\n" ] } ], "source": [ "acro.custom_output(\n", " \"XandY.jpeg\", \"This output is an image showing the relationship between X and Y\"\n", ")" ] }, { "cell_type": "markdown", "id": "6efde761", "metadata": {}, "source": [ "### Request an exception for some of the outputs" ] }, { "cell_type": "code", "execution_count": 35, "id": "f38b4334", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:exception request was added to output_0\n", "INFO:acro:records:exception request was added to output_3\n", "INFO:acro:records:exception request was added to output_5\n", "INFO:acro:records:exception request was added to output_6\n" ] } ], "source": [ "acro.add_exception(\"output_0\", \"I really need this.\")\n", "acro.add_exception(\"output_3\", \"This one is safe. Trust me, I'm a professor.\")\n", "acro.add_exception(\"output_5\", \"It's not disclosive, I promise.\")\n", "acro.add_exception(\"output_6\", \"I need this one too\")" ] }, { "cell_type": "markdown", "id": "5a586694", "metadata": {}, "source": [ "### Finalise ACRO" ] }, { "cell_type": "code", "execution_count": 39, "id": "9e554eea", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_7\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(\n", "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "outcome: grant_type G N R R/G All\n", "year \n", "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok\n", "2011 ok ok ok threshold; ok\n", "2012 ok ok ok threshold; ok\n", "2013 ok ok ok threshold; ok\n", "2014 ok ok ok threshold; ok\n", "2015 ok ok ok threshold; ok\n", "All ok ok ok ok ok\n", "output: [grant_type G N R R/G All\n", "year \n", "2010 9921906.0 0.000000 8420373.0 11636000.0 8320154.5\n", "2011 8502246.0 125663.226562 7689140.0 16047500.0 5310392.0\n", "2012 11458580.0 131859.062500 6896304.0 16810000.0 5220580.5\n", "2013 13557147.0 150488.453125 7088095.5 16765625.0 5578657.0\n", "2014 13748147.0 135494.781250 8118565.5 17845750.0 6072600.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", "All 11412787.0 136158.859375 8006361.0 16648273.0 5968295.5]\n", "timestamp: 2025-03-06T19:43:34.695759\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " a reason should be provided\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_8\n", "status: review\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 10, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [[0, 2], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 1], [5, 2]], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\"mean\")\n", "summary: review; negative values found\n", "outcome: grant_type G N R R/G\n", "year \n", "2010 negative \n", "2011 negative negative \n", "2012 negative \n", "2013 negative negative \n", "2014 negative negative \n", "2015 negative negative \n", "output: [grant_type G N R R/G\n", "year \n", "2010 9921906.0 0.000000 8280032.5 11636000.0\n", "2011 8502246.0 123496.445312 7577703.5 16047500.0\n", "2012 11458580.0 131859.062500 6796357.5 16810000.0\n", "2013 13557147.0 147937.625000 6988263.0 16765625.0\n", "2014 13748147.0 133198.078125 7997392.0 17845750.0\n", "2015 11133433.0 146572.015625 10388612.0 18278624.0]\n", "timestamp: 2025-03-06T19:43:34.727227\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: review.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " negative values are valid financial losses\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_9\n", "status: fail\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[1, 0], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5]], 'p-ratio': [[1, 0], [3, 0]], 'nk-rule': [[3, 0]], 'all-values-are-same': []}}\n", "command: table = acro.pivot_table(\n", "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "outcome: inc_grants \\\n", "year 2010 2011 2012 \n", "grant_type \n", "G ok ok ok \n", "N threshold; p-ratio; ok ok \n", "R ok ok ok \n", "R/G threshold; p-ratio; nk-rule; threshold; threshold; \n", "All ok ok ok \n", "\n", " \n", "year 2013 2014 2015 All \n", "grant_type \n", "G ok ok ok ok \n", "N ok ok ok ok \n", "R ok ok ok ok \n", "R/G threshold; threshold; threshold; ok \n", "All ok ok ok ok \n", "output: [ inc_grants \\\n", "year 2010 2011 2012 2013 2014 \n", "grant_type \n", "G 138906688.0 127533696.0 171878704.0 203357200.0 206222208.0 \n", "N 0.0 7192804.0 7779685.0 8728330.0 7858697.0 \n", "R 504137056.0 532464704.0 480105472.0 511361408.0 554594176.0 \n", "R/G 46544000.0 128380000.0 134480000.0 134125000.0 142766000.0 \n", "All 689587776.0 795571264.0 794243904.0 857571968.0 911441088.0 \n", "\n", " \n", "year 2015 All \n", "grant_type \n", "G 133601200.0 9.814997e+08 \n", "N 8501187.0 4.006070e+07 \n", "R 551457280.0 3.134120e+09 \n", "R/G 146228992.0 7.325240e+08 \n", "All 839788672.0 4.888204e+09 ]\n", "timestamp: 2025-03-06T19:43:34.782873\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " a reason should be provided\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_12\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: table = acro.pivot_table(\n", "summary: review; negative values found\n", "outcome: mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G \n", "N negative negative\n", "R negative negative\n", "R/G \n", "output: [ mean std\n", " inc_grants inc_grants\n", "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.341800e+05 1.990196e+05\n", "R 7.882230e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", "timestamp: 2025-03-06T19:43:34.872024\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: review.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " negative values are valid financial losses\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_17\n", "status: fail\n", "type: histogram\n", "properties: {'method': 'histogram'}\n", "sdc: {}\n", "command: hist = acro.hist(df, \"inc_grants\")\n", "summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: ['acro_artifacts/histogram_0.png']\n", "timestamp: 2025-03-06T19:43:35.149884\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " to be fair this is probably disclosive ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_18\n", "status: fail\n", "type: histogram\n", "properties: {'method': 'histogram'}\n", "sdc: {}\n", "command: hist = acro.hist(df, \"inc_grants\")\n", "summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: ['acro_artifacts/histogram_1.png']\n", "timestamp: 2025-03-06T19:43:35.204307\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " to be fair this is probably disclosive ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: pivot_table\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 1], [1, 0], [1, 1]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n", "command: acro.crosstab(mydata.year, mydata.survivor)\n", "summary: fail; threshold: 4 cells may need suppressing; \n", "outcome: survivor Dead_in_2015 Alive_in_2015\n", "year \n", "2010 threshold; threshold; \n", "2011 threshold; threshold; \n", "output: [survivor Dead in 2015 Alive in 2015\n", "year \n", "2010 2 2\n", "2011 2 2]\n", "timestamp: 2025-03-06T19:43:34.423743\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " to be fair this is probably disclosive ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:\n", "uid: output_19\n", "status: review\n", "type: custom\n", "properties: {}\n", "sdc: {}\n", "command: custom\n", "summary: review\n", "outcome: Empty DataFrame\n", "Columns: []\n", "Index: []\n", "output: ['XandY.jpeg']\n", "timestamp: 2025-03-06T19:43:35.258648\n", "comments: ['This output is an image showing the relationship between X and Y']\n", "exception: \n", "\n", "The status of the record above is: review.\n", "Please explain why an exception should be granted.\n", "\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ " please review- this image is not disclosive\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:acro:records:outputs written to: ACRO_RES\n" ] } ], "source": [ "SAVE_PATH = \"ACRO_RES\"\n", "\n", "# output = acro.finalise(SAVE_PATH, \"xlsx\")\n", "output = acro.finalise(SAVE_PATH, \"json\")" ] }, { "cell_type": "markdown", "id": "64e00920", "metadata": {}, "source": [ "### List files generated" ] }, { "cell_type": "code", "execution_count": 40, "id": "96b72072", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "XandY.jpeg\n", "config.json\n", "histogram_0.png\n", "histogram_1.png\n", "output_0_0.csv\n", "output_10_0.csv\n", "output_11_0.csv\n", "output_12_0.csv\n", "output_13_0.csv\n", "output_13_1.csv\n", "output_13_2.csv\n", "output_14_0.csv\n", "output_14_1.csv\n", "output_14_2.csv\n", "output_15_0.csv\n", "output_15_1.csv\n", "output_16_0.csv\n", "output_16_1.csv\n", "output_3_0.csv\n", "output_5_0.csv\n", "output_6_0.csv\n", "output_7_0.csv\n", "output_8_0.csv\n", "output_9_0.csv\n", "pivot_table_0.csv\n", "results.json\n" ] } ], "source": [ "files = []\n", "for name in os.listdir(SAVE_PATH):\n", " if os.path.isfile(os.path.join(SAVE_PATH, name)):\n", " files.append(name)\n", "files.sort()\n", "for f in files:\n", " print(f)" ] }, { "cell_type": "markdown", "id": "a0e77cbe", "metadata": {}, "source": [ "### Checksums" ] }, { "cell_type": "code", "execution_count": 38, "id": "f5f6364e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "XandY.jpeg.txt\n", "histogram_0.png.txt\n", "histogram_1.png.txt\n", "output_0_0.csv.txt\n", "output_10_0.csv.txt\n", "output_11_0.csv.txt\n", "output_12_0.csv.txt\n", "output_13_0.csv.txt\n", "output_13_1.csv.txt\n", "output_13_2.csv.txt\n", "output_14_0.csv.txt\n", "output_14_1.csv.txt\n", "output_14_2.csv.txt\n", "output_15_0.csv.txt\n", "output_15_1.csv.txt\n", "output_16_0.csv.txt\n", "output_16_1.csv.txt\n", "output_3_0.csv.txt\n", "output_5_0.csv.txt\n", "output_6_0.csv.txt\n", "output_7_0.csv.txt\n", "output_8_0.csv.txt\n", "output_9_0.csv.txt\n", "pivot_table_0.csv.txt\n", "results.json.txt\n" ] } ], "source": [ "files = []\n", "checksum_dir = os.path.join(SAVE_PATH, \"checksums\")\n", "for name in os.listdir(checksum_dir):\n", " if os.path.isfile(os.path.join(checksum_dir, name)):\n", " files.append(name)\n", "files.sort()\n", "for f in files:\n", " print(f)" ] }, { "cell_type": "code", "execution_count": null, "id": "f241054a-c91e-4a91-bdc0-0395bbe084dd", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "testacro", "language": "python", "name": "testacro" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" } }, "nbformat": 4, "nbformat_minor": 5 }