{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ACRO Demonstration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# uncomment this line if acro is not installed\n",
    "# ie you are in development mode\n",
    "# sys.path.insert(0, os.path.abspath(\"..\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from acro import ACRO"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instantiate ACRO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:version: 0.4.8\n",
      "INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False, 'survival_safe_threshold': 10, 'zeros_are_disclosive': True}\n",
      "INFO:acro:automatic suppression: False\n"
     ]
    }
   ],
   "source": [
    "acro = ACRO(suppress=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load test data\n",
    "The dataset used in this notebook is the nursery dataset from OpenML.  \n",
    "- In this version, the data can be read directly from the local machine after it has been downloaded. \n",
    "- The code below reads the data from a folder called \"data\" which we assume is at the same level as the folder where you are working.\n",
    "- The path might need to be changed if the data has been downloaded and stored elsewhere.\n",
    " - for example use:  \n",
    "    path = os.path.join(\"data\", \"nursery.arff\")  \n",
    "    if the data is in a sub-folder of your work folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>parents</th>\n",
       "      <th>has_nurs</th>\n",
       "      <th>form</th>\n",
       "      <th>children</th>\n",
       "      <th>housing</th>\n",
       "      <th>finance</th>\n",
       "      <th>social</th>\n",
       "      <th>health</th>\n",
       "      <th>recommend</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>usual</td>\n",
       "      <td>proper</td>\n",
       "      <td>complete</td>\n",
       "      <td>1</td>\n",
       "      <td>convenient</td>\n",
       "      <td>convenient</td>\n",
       "      <td>nonprob</td>\n",
       "      <td>recommended</td>\n",
       "      <td>recommend</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>usual</td>\n",
       "      <td>proper</td>\n",
       "      <td>complete</td>\n",
       "      <td>1</td>\n",
       "      <td>convenient</td>\n",
       "      <td>convenient</td>\n",
       "      <td>nonprob</td>\n",
       "      <td>priority</td>\n",
       "      <td>priority</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>usual</td>\n",
       "      <td>proper</td>\n",
       "      <td>complete</td>\n",
       "      <td>1</td>\n",
       "      <td>convenient</td>\n",
       "      <td>convenient</td>\n",
       "      <td>nonprob</td>\n",
       "      <td>not_recom</td>\n",
       "      <td>not_recom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>usual</td>\n",
       "      <td>proper</td>\n",
       "      <td>complete</td>\n",
       "      <td>1</td>\n",
       "      <td>convenient</td>\n",
       "      <td>convenient</td>\n",
       "      <td>slightly_prob</td>\n",
       "      <td>recommended</td>\n",
       "      <td>recommend</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>usual</td>\n",
       "      <td>proper</td>\n",
       "      <td>complete</td>\n",
       "      <td>1</td>\n",
       "      <td>convenient</td>\n",
       "      <td>convenient</td>\n",
       "      <td>slightly_prob</td>\n",
       "      <td>priority</td>\n",
       "      <td>priority</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  parents has_nurs      form children     housing     finance         social  \\\n",
       "0   usual   proper  complete        1  convenient  convenient        nonprob   \n",
       "1   usual   proper  complete        1  convenient  convenient        nonprob   \n",
       "2   usual   proper  complete        1  convenient  convenient        nonprob   \n",
       "3   usual   proper  complete        1  convenient  convenient  slightly_prob   \n",
       "4   usual   proper  complete        1  convenient  convenient  slightly_prob   \n",
       "\n",
       "        health  recommend  \n",
       "0  recommended  recommend  \n",
       "1     priority   priority  \n",
       "2    not_recom  not_recom  \n",
       "3  recommended  recommend  \n",
       "4     priority   priority  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from scipy.io.arff import loadarff\n",
    "\n",
    "path = os.path.join(\"../data\", \"nursery.arff\")\n",
    "data = loadarff(path)\n",
    "df = pd.DataFrame(data[0])\n",
    "df = df.select_dtypes([object])\n",
    "df = df.stack().str.decode(\"utf-8\").unstack()\n",
    "df.rename(columns={\"class\": \"recommend\"}, inplace=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examples of producing tabular output\n",
    "We rely on the industry-standard package **pandas** for tabulating data.  \n",
    "In the next few examples we show:\n",
    "- first, how a researcher would normally make a call in pandas, saving the results in a variable that they can view on screen (or save to file?)\n",
    "- then how the call is identical in SACRO, except that:\n",
    "  - \"pd\" is replaced by \"acro\"\n",
    "  - the researcher immediately sees a copy of what the TRE output checker will see.\n",
    "  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pandas crosstab\n",
    "This is an example of crosstab using pandas.  \n",
    "We first make the call, then the second line print the outputs to screen."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "parents     great_pret  pretentious  usual\n",
      "recommend                                 \n",
      "not_recom         1440         1440   1440\n",
      "priority           858         1484   1924\n",
      "recommend            0            0      2\n",
      "spec_prior        2022         1264    758\n",
      "very_recom           0          132    196\n"
     ]
    }
   ],
   "source": [
    "table = pd.crosstab(df.recommend, df.parents)\n",
    "print(table)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ACRO crosstab\n",
    "- This is an example of crosstab using ACRO.  \n",
    "- The INFO lines show the researcher what will be reported to the output checkers.\n",
    "- Then the (suppressed as necessary) table is shown via the print command as before."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:get_summary(): fail; threshold: 4 cells may need suppressing; \n",
      "INFO:acro:outcome_df:\n",
      "--------------------------------------------------------|\n",
      "parents        |great_pret   |pretentious  |usual       |\n",
      "recommendation |             |             |            |\n",
      "--------------------------------------------------------|\n",
      "not_recom      |          ok |          ok |          ok|\n",
      "priority       |          ok |          ok |          ok|\n",
      "recommend      | threshold;  | threshold;  | threshold; |\n",
      "spec_prior     |          ok |          ok |          ok|\n",
      "very_recom     | threshold;  |          ok |          ok|\n",
      "--------------------------------------------------------|\n",
      "\n",
      "INFO:acro:records:add(): output_0\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "parents         great_pret  pretentious  usual\n",
      "recommendation                                \n",
      "not_recom             1440         1440   1440\n",
      "priority               858         1484   1924\n",
      "recommend                0            0      2\n",
      "spec_prior            2022         1264    758\n",
      "very_recom               0          132    196\n"
     ]
    }
   ],
   "source": [
    "safe_table = acro.crosstab(\n",
    "    df.recommend, df.parents, rownames=[\"recommendation\"], colnames=[\"parents\"]\n",
    ")\n",
    "print(safe_table)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ACRO crosstab with suppression\n",
    "- This is an example of crosstab with suppressing the cells that violate the disclosure tests.\n",
    "- Note that you need to change the value of the suppress variable in the acro object to True. Then run the crosstab command.  \n",
    "- If you wish to continue the research while suppressing the outputs, leave the suppress variable as it is, otherwise turn it off."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; \n",
      "INFO:acro:outcome_df:\n",
      "----------------------------------------------------|\n",
      "parents    |great_pret   |pretentious  |usual       |\n",
      "recommend  |             |             |            |\n",
      "----------------------------------------------------|\n",
      "not_recom  |          ok |          ok |          ok|\n",
      "priority   |          ok |          ok |          ok|\n",
      "recommend  | threshold;  | threshold;  | threshold; |\n",
      "spec_prior |          ok |          ok |          ok|\n",
      "very_recom | threshold;  |          ok |          ok|\n",
      "----------------------------------------------------|\n",
      "\n",
      "INFO:acro:records:add(): output_1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "parents     great_pret  pretentious   usual\n",
      "recommend                                  \n",
      "not_recom       1440.0       1440.0  1440.0\n",
      "priority         858.0       1484.0  1924.0\n",
      "recommend          NaN          NaN     NaN\n",
      "spec_prior      2022.0       1264.0   758.0\n",
      "very_recom         NaN        132.0   196.0\n"
     ]
    }
   ],
   "source": [
    "acro.suppress = True\n",
    "\n",
    "safe_table = acro.crosstab(df.recommend, df.parents)\n",
    "print(safe_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "acro.suppress = False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ACRO functionality to let users manage their outputs\n",
    "\n",
    "### 1: List current ACRO outputs\n",
    "This is an example of using the print_output function to list all the outputs created so far"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "uid: output_0\n",
      "status: fail\n",
      "type: table\n",
      "properties: {'method': 'crosstab'}\n",
      "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[2, 0], [2, 1], [2, 2], [4, 0]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n",
      "command: safe_table = acro.crosstab(\n",
      "summary: fail; threshold: 4 cells may need suppressing; \n",
      "outcome: parents          great_pret  pretentious        usual\n",
      "recommendation                                       \n",
      "not_recom                ok           ok           ok\n",
      "priority                 ok           ok           ok\n",
      "recommend       threshold;   threshold;   threshold; \n",
      "spec_prior               ok           ok           ok\n",
      "very_recom      threshold;            ok           ok\n",
      "output: [parents         great_pret  pretentious  usual\n",
      "recommendation                                \n",
      "not_recom             1440         1440   1440\n",
      "priority               858         1484   1924\n",
      "recommend                0            0      2\n",
      "spec_prior            2022         1264    758\n",
      "very_recom               0          132    196]\n",
      "timestamp: 2025-03-06T19:38:29.296719\n",
      "comments: []\n",
      "exception: \n",
      "\n",
      "uid: output_1\n",
      "status: fail\n",
      "type: table\n",
      "properties: {'method': 'crosstab'}\n",
      "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[2, 0], [2, 1], [2, 2], [4, 0]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n",
      "command: safe_table = acro.crosstab(df.recommend, df.parents)\n",
      "summary: fail; threshold: 4 cells suppressed; \n",
      "outcome: parents      great_pret  pretentious        usual\n",
      "recommend                                        \n",
      "not_recom            ok           ok           ok\n",
      "priority             ok           ok           ok\n",
      "recommend   threshold;   threshold;   threshold; \n",
      "spec_prior           ok           ok           ok\n",
      "very_recom  threshold;            ok           ok\n",
      "output: [parents     great_pret  pretentious   usual\n",
      "recommend                                  \n",
      "not_recom       1440.0       1440.0  1440.0\n",
      "priority         858.0       1484.0  1924.0\n",
      "recommend          NaN          NaN     NaN\n",
      "spec_prior      2022.0       1264.0   758.0\n",
      "very_recom         NaN        132.0   196.0]\n",
      "timestamp: 2025-03-06T19:38:29.315826\n",
      "comments: []\n",
      "exception: \n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"uid: output_0\\nstatus: fail\\ntype: table\\nproperties: {'method': 'crosstab'}\\nsdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[2, 0], [2, 1], [2, 2], [4, 0]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\\ncommand: safe_table = acro.crosstab(\\nsummary: fail; threshold: 4 cells may need suppressing; \\noutcome: parents          great_pret  pretentious        usual\\nrecommendation                                       \\nnot_recom                ok           ok           ok\\npriority                 ok           ok           ok\\nrecommend       threshold;   threshold;   threshold; \\nspec_prior               ok           ok           ok\\nvery_recom      threshold;            ok           ok\\noutput: [parents         great_pret  pretentious  usual\\nrecommendation                                \\nnot_recom             1440         1440   1440\\npriority               858         1484   1924\\nrecommend                0            0      2\\nspec_prior            2022         1264    758\\nvery_recom               0          132    196]\\ntimestamp: 2025-03-06T19:38:29.296719\\ncomments: []\\nexception: \\n\\nuid: output_1\\nstatus: fail\\ntype: table\\nproperties: {'method': 'crosstab'}\\nsdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[2, 0], [2, 1], [2, 2], [4, 0]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\\ncommand: safe_table = acro.crosstab(df.recommend, df.parents)\\nsummary: fail; threshold: 4 cells suppressed; \\noutcome: parents      great_pret  pretentious        usual\\nrecommend                                        \\nnot_recom            ok           ok           ok\\npriority             ok           ok           ok\\nrecommend   threshold;   threshold;   threshold; \\nspec_prior           ok           ok           ok\\nvery_recom  threshold;            ok           ok\\noutput: [parents     great_pret  pretentious   usual\\nrecommend                                  \\nnot_recom       1440.0       1440.0  1440.0\\npriority         858.0       1484.0  1924.0\\nrecommend          NaN          NaN     NaN\\nspec_prior      2022.0       1264.0   758.0\\nvery_recom         NaN        132.0   196.0]\\ntimestamp: 2025-03-06T19:38:29.315826\\ncomments: []\\nexception: \\n\\n\""
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "acro.print_outputs()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2: Remove some ACRO outputs before finalising \n",
    "This is an example of deleting some of the ACRO outputs.  \n",
    "The name of the output that needs to be removed should be passed to the function remove_output.  \n",
    "- The output name can be taken from the outputs listed by the print_outputs function, \n",
    "- or by listing the results and choosing the specific output that needs to be removed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:records:remove(): output_0 removed\n"
     ]
    }
   ],
   "source": [
    "acro.remove_output(\"output_0\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3: Rename ACRO outputs before finalising\n",
    "This is an example of renaming the outputs to provide a more descriptive name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:records:rename_output(): output_1 renamed to cross_tabulation\n"
     ]
    }
   ],
   "source": [
    "acro.rename_output(\"output_1\", \"cross_tabulation\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4: Add a comment to output\n",
    "This is an example to add a comment to outputs.  \n",
    "It can be used to provide a description or to pass additional information to the output checkers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:records:a comment was added to cross_tabulation\n"
     ]
    }
   ],
   "source": [
    "acro.add_comments(\"cross_tabulation\", \"Please let me have this data.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5: (the big one) Finalise ACRO\n",
    "This is an example of the function _finalise()_ which the users must call at the end of each session.  \n",
    "- It takes each output and saves it to a CSV file.    \n",
    "- It also saves the SDC analysis for each output to a json file or Excel file  \n",
    "  (depending on the extension of the name of the file provided as an input to the function)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:records:\n",
      "uid: cross_tabulation\n",
      "status: fail\n",
      "type: table\n",
      "properties: {'method': 'crosstab'}\n",
      "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[2, 0], [2, 1], [2, 2], [4, 0]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}\n",
      "command: safe_table = acro.crosstab(df.recommend, df.parents)\n",
      "summary: fail; threshold: 4 cells suppressed; \n",
      "outcome: parents      great_pret  pretentious        usual\n",
      "recommend                                        \n",
      "not_recom            ok           ok           ok\n",
      "priority             ok           ok           ok\n",
      "recommend   threshold;   threshold;   threshold; \n",
      "spec_prior           ok           ok           ok\n",
      "very_recom  threshold;            ok           ok\n",
      "output: [parents     great_pret  pretentious   usual\n",
      "recommend                                  \n",
      "not_recom       1440.0       1440.0  1440.0\n",
      "priority         858.0       1484.0  1924.0\n",
      "recommend          NaN          NaN     NaN\n",
      "spec_prior      2022.0       1264.0   758.0\n",
      "very_recom         NaN        132.0   196.0]\n",
      "timestamp: 2025-03-06T19:38:29.315826\n",
      "comments: ['Please let me have this data.']\n",
      "exception: \n",
      "\n",
      "The status of the record above is: fail.\n",
      "Please explain why an exception should be granted.\n",
      "\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " exception requested\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:acro:records:outputs written to: Examples\n"
     ]
    }
   ],
   "source": [
    "output = acro.finalise(\"Examples\", \"json\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "testacro",
   "language": "python",
   "name": "testacro"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}