{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "nbsphinx": "hidden"
   },
   "outputs": [],
   "source": [
    "# These lines are only for rendering in the docs, and are hidden through Jupyter tags\n",
    "# Do not run if you're running the notebook seperately\n",
    "\n",
    "import plotly.io as pio\n",
    "\n",
    "pio.renderers.default = \"notebook_connected\"\n",
    "\n",
    "import sys\n",
    "\n",
    "sys.path.append(\"../../../\")\n",
    "sys.path.append(\"../../python\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "nbsphinx": "hidden"
   },
   "outputs": [],
   "source": [
    "# If you're reading this: I'm hiding all of the code here in this top cell block \n",
    "# so it doesn't show up as ugly & large codeblocks in the rest of the doc.\n",
    "# For code articles & tutorials we want the code to be present, \n",
    "# but since this is an explainer we'd rather hide it. \n",
    "# I'm not aware of any good alternatives to hide input but not output, so this will do for now.\n",
    "\n",
    "import numpy as np\n",
    "import plotly.express as px\n",
    "import polars as pl\n",
    "from string import ascii_uppercase\n",
    "from scipy.stats import gaussian_kde\n",
    "\n",
    "settingsShowTSSpread=pl.DataFrame({\"n\": 5, \"positives\": range(1, 1001)}).with_columns(\n",
    "    p=[0.01, 0.05, 0.1]\n",
    ").explode(\"p\").with_columns(evidence=pl.col(\"positives\") / pl.col(\"p\"))\n",
    "\n",
    "def betaDistribution(structcol):\n",
    "    return structcol.map_elements(\n",
    "        lambda x: np.random.beta(\n",
    "            x[\"p\"] * x[\"evidence\"], (1 - x[\"p\"]) * x[\"evidence\"], x[\"n\"]\n",
    "        ).tolist(), return_dtype=pl.List(pl.Float32)\n",
    "    )\n",
    "\n",
    "\n",
    "thompsonSamplingSimulation = settingsShowTSSpread.with_columns(\n",
    "    sampled_propensity=betaDistribution(pl.struct([\"n\", \"p\", \"evidence\"]))\n",
    ").explode('sampled_propensity').with_columns(positives = pl.col('evidence')*pl.col('p'))\n",
    "\n",
    "\n",
    "# Spread of the Thompson Sampled propensities\n",
    "ThompsonSamplingSpread = px.scatter(\n",
    "    thompsonSamplingSimulation,\n",
    "    x=\"positives\",\n",
    "    y=\"sampled_propensity\",\n",
    "    color=\"p\",\n",
    "    opacity=0.6,\n",
    "    labels={\n",
    "        \"sampled_propensity\": \"Sampled Propensity\",\n",
    "        \"positives\": \"Number of positive responses in the Adaptive Model\",\n",
    "        \"p\":\"Propensity\"\n",
    "    },\n",
    "    range_y=[0, 0.2],\n",
    "    title='Thompson Sampling',\n",
    "    template=\"none\",\n",
    ").update_coloraxes(showscale=False).update_traces(marker={\"size\":3}).update_yaxes({'tickformat':',.0%', 'tickmode':'array', 'tickvals':[0, 0.01, 0.05, 0.1]})\n",
    "\n",
    "# Convergence of the Thompson Sampled propensities\n",
    "s = thompsonSamplingSimulation['positives']\n",
    "thompsonSamplingSimulation2 = thompsonSamplingSimulation.hstack([s.cut(breaks=np.array(range(int(s.min()), int(s.max())+20, 20))-1).rename(\"bin\")])\n",
    "s = thompsonSamplingSimulation2.group_by(\"p\", \"bin\").agg(\n",
    "    n=pl.len(),\n",
    "    n90=(((pl.col(\"sampled_propensity\") - pl.col(\"p\")) / pl.col(\"p\")) < 0.1).sum(),\n",
    "    positives=pl.min(\"positives\"),\n",
    ").with_columns(pct=pl.col('n90')/pl.col('n')).sort('p', 'bin').with_columns(pl.col('p').cast(pl.Utf8).cast(pl.Categorical))\n",
    "PercentageOfSampled = px.line(s, x='positives', y='pct', color='p', template='none', line_group='p', title='Percentage of Sampled Propensities <br><sup>that are within 10% of the Model Propensities</sup>', labels={'pct':'Percentage', 'positives':'Positive Responses in the Adaptive Model', 'p':'Propensity'}).update_layout(yaxis_tickformat='.0%')\n",
    "\n",
    "# Beta distribution\n",
    "settings1 = (\n",
    "    pl.DataFrame({\"n\": 100000})\n",
    "    .with_columns(\n",
    "        p=[0.01, 0.05, 0.1], evidence=[2000, 200, 100000], ypeak=[200, 50, 500])\n",
    "    .explode(\"p\", \"evidence\", 'ypeak')\n",
    ").with_columns(\n",
    "    sampled_propensity=betaDistribution(pl.struct([\"n\", \"p\", \"evidence\"]))\n",
    ").explode('sampled_propensity').with_columns(positives = pl.col('evidence')*pl.col('p'))\n",
    "\n",
    "results = {}\n",
    "for p, series in settings1.group_by('p'):\n",
    "    results[str(p)] = gaussian_kde(series['sampled_propensity'], 'silverman')(np.arange(0,0.15,0.0001))\n",
    "\n",
    "results_df = pl.DataFrame(results).with_columns(sampledPropensity=pl.Series(np.arange(0,0.15,0.0001)))\n",
    "\n",
    "# Convert to long format for plotly\n",
    "results_long = results_df.unpivot(\n",
    "    on=results_df.columns[:-1],\n",
    "    index=[\"sampledPropensity\"],\n",
    "    variable_name=\"variable\",\n",
    "    value_name=\"value\",\n",
    ")\n",
    "\n",
    "DistributionOfSampled = px.area(\n",
    "    results_long, \n",
    "    x=\"sampledPropensity\", \n",
    "    y=\"value\", \n",
    "    color=\"variable\",\n",
    "    title='Distribution of the sampled propensities<br><sup>for a few combinations of model propensity and evidence</sup>', \n",
    "    template='none', \n",
    "    labels={'value':'', 'sampledPropensity':'Sampled Propensity', 'variable':'Propensity'}\n",
    ").update_yaxes({'visible':True}).update_xaxes({'tickformat':',.0%', 'tickmode':'array', 'tickvals':[0, 0.01, 0.05, 0.1]}).update_layout(showlegend=False).update_traces({'line':{'width':0.0}})\n",
    "\n",
    "for annot in list(settings1.select('p', 'ypeak', 'positives').unique().iter_rows(named=True)):\n",
    "    DistributionOfSampled.add_annotation(x=annot['p'],y=annot['ypeak']+20, text = f\"Propensity = {annot['p']:.0%}<br>Positives = {annot['positives']}\", bgcolor='#FFFFFF', bordercolor='#000000', showarrow=False)\n",
    "\n",
    "# Outbound Model Maturity\n",
    "OutboundMaturity = px.line(\n",
    "    pl.DataFrame(\n",
    "        {\"Positives\": [0, 200, 300], \"modelMaturityBasedReach\": [0.02, 1.0, 1.0]}\n",
    "    ),\n",
    "    x=\"Positives\",\n",
    "    y=\"modelMaturityBasedReach\",\n",
    "    template=\"none\",\n",
    "    labels={\n",
    "        \"modelMaturityBasedReach\": \"% Selected from Eligible Customers\",\n",
    "        \"Positives\": \"Number of Positives in the Adaptive Model\",\n",
    "    },\n",
    ").update_layout(yaxis_tickformat='.0%')\n",
    "\n",
    "# Simulating Outbound Maturity\n",
    "nInitialActions = 5\n",
    "basePropensity = 0.01\n",
    "ncust = 100000\n",
    "\n",
    "letters = list(ascii_uppercase)\n",
    "def simulateActions(initialActions):\n",
    "    actions = initialActions.with_columns(\n",
    "        positives=0.0, modelMaturityBasedReach=0.0, expectedaccepts=0.0, impressions=0.0\n",
    "    )\n",
    "    for w in range(0, 11):\n",
    "        if w > 0:\n",
    "            # Create new action row\n",
    "            newWeek = pl.DataFrame(\n",
    "                {\n",
    "                    \"action\": letters[nInitialActions + w - 1],\n",
    "                    \"basepropensity\": basePropensity,\n",
    "                    \"evidence\": 0.0,\n",
    "                    \"week\": w,\n",
    "                    \"positives\": 0.0,\n",
    "                    \"modelMaturityBasedReach\": 0.0,\n",
    "                    \"expectedaccepts\": 0.0,\n",
    "                    \"impressions\": 0.0,\n",
    "                },\n",
    "                schema_overrides={\"week\": pl.UInt8},\n",
    "            )\n",
    "            # Add the new action to the previous week's data\n",
    "            newWeek = pl.concat(\n",
    "                [\n",
    "                    actions.filter(pl.col(\"week\") == w - 1).with_columns(\n",
    "                        week=pl.lit(w).cast(pl.UInt8)\n",
    "                    ),\n",
    "                    newWeek,\n",
    "                ]\n",
    "            )\n",
    "            # Add this new table to the main actions table\n",
    "            actions = pl.concat([actions, newWeek])\n",
    "\n",
    "        # Simulate new positives\n",
    "        actions = actions.with_columns(\n",
    "            positives=pl.col(\"evidence\") * pl.col(\"basepropensity\")\n",
    "        )\n",
    "\n",
    "        # Simulate maturity based reach\n",
    "        actions = actions.with_columns(\n",
    "            modelMaturityBasedReach=pl.when(pl.col(\"positives\") >= 200)\n",
    "            .then(1.0)\n",
    "            .otherwise(0.02 + (0.98 * (pl.col(\"positives\") / 200))),\n",
    "        )\n",
    "\n",
    "        # Calculate expected accepts and impressions for new week\n",
    "        actions = actions.with_columns(\n",
    "            [\n",
    "                pl.when(pl.col(\"week\") == w)\n",
    "                .then(\n",
    "                    pl.lit(ncust)\n",
    "                    * pl.col(\"modelMaturityBasedReach\")\n",
    "                    * pl.col(\"basepropensity\")\n",
    "                )\n",
    "                .otherwise(pl.col(\"expectedaccepts\"))\n",
    "                .alias(\"expectedaccepts\"),\n",
    "                pl.when(pl.col(\"week\") == w)\n",
    "                .then(pl.lit(ncust) * pl.col(\"modelMaturityBasedReach\"))\n",
    "                .otherwise(pl.col(\"impressions\"))\n",
    "                .alias(\"impressions\"),\n",
    "            ]\n",
    "        ).with_columns(\n",
    "            pl.when(pl.col(\"week\") == w)\n",
    "            .then(\n",
    "                pl.col(\"impressions\")\n",
    "                * pl.min_horizontal(\n",
    "                    [\n",
    "                        pl.lit(1.0),\n",
    "                        (\n",
    "                            pl.lit(ncust)\n",
    "                            / pl.col(\"impressions\").filter(pl.col(\"week\") == w).sum()\n",
    "                        ),\n",
    "                    ]\n",
    "                )\n",
    "            )\n",
    "            .otherwise(pl.col(\"impressions\"))\n",
    "            .alias(\"impressions\")\n",
    "        )\n",
    "\n",
    "        # Set evidence to the previous week's evidence + impressions\n",
    "        def previousEvidence():\n",
    "            return (pl.col(\"evidence\").shift(1).over([\"action\"])) + (\n",
    "                pl.col(\"impressions\").shift(1).over(\"action\")\n",
    "            )\n",
    "\n",
    "        if w > 0:\n",
    "            actions = actions.with_columns(\n",
    "                evidence=pl.when(previousEvidence().is_not_null())\n",
    "                .then(previousEvidence())\n",
    "                .otherwise(pl.col(\"evidence\"))\n",
    "            )\n",
    "\n",
    "    fig = px.area(\n",
    "        actions.sort(\"action\", descending=True),\n",
    "        x=\"week\",\n",
    "        y=\"impressions\",\n",
    "        color=\"action\",\n",
    "        template=\"none\",\n",
    "        title=\"Effect of Outbound Model Maturity <br><sup>when introducing weekly new actions</sup>\",\n",
    "        labels={\"impressions\": \"Number of Customers\"},\n",
    "    ).update_layout(legend={\"traceorder\": \"reversed\"})\n",
    "    scale = px.colors.sample_colorscale(\n",
    "        \"RdBu\", [n / (len(fig.data) - 1) for n in range(len(fig.data))]\n",
    "    )\n",
    "    for i in range(len(fig.data)):\n",
    "        fig.data[i][\"line\"][\"color\"] = scale[i]\n",
    "    return fig\n",
    "\n",
    "\n",
    "initialTab = pl.DataFrame(\n",
    "    {\n",
    "        \"action\": letters[0:nInitialActions],\n",
    "        \"basepropensity\": np.random.normal(\n",
    "            loc=basePropensity, scale=0.001, size=nInitialActions\n",
    "        ),\n",
    "        \"evidence\": np.floor(\n",
    "            np.random.uniform(size=nInitialActions, low=15000, high=40000)\n",
    "        ),\n",
    "        \"week\": 0,\n",
    "    },\n",
    "    schema_overrides={\"week\": pl.UInt8},\n",
    ")\n",
    "WithExistingActions = simulateActions(initialTab)\n",
    "\n",
    "\n",
    "initialTab = pl.DataFrame(\n",
    "    {\n",
    "        \"action\": letters[0:nInitialActions],\n",
    "        \"basepropensity\": np.random.normal(\n",
    "            loc=basePropensity, scale=0.001, size=nInitialActions\n",
    "        ),\n",
    "        \"evidence\": 0.0,\n",
    "        \"week\": 0,\n",
    "    },\n",
    "    schema_overrides={\"week\": pl.UInt8},\n",
    ")\n",
    "WithAllNewActions = simulateActions(initialTab)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Thompson Sampling\n",
    "\n",
    "Pega Customer Decision Hub uses a mechanism called Thompson Sampling, which is a method to sample the propensities from a distribution that centers around the model propensity. The width of the distribution depends on the evidence the model has seen. For new actions the propensities are random between 0 and 1. When the model gathers more evidence, the sampled propensities get closer to the model propensities.\n",
    "\n",
    "This mechanism helps in a few different ways. For outbound channels, when multiple new actions are launched, they will all have the \n",
    "same 0.5 propensity (that is where a new model starts). Thompson Sampling effectively adds a bit of noise so it is not always the same\n",
    "new action that gets selected. For both inbound and outbound channels, Thompson Sampling helps to add some extra \"exploration\" to the\n",
    "models in the early phases.\n",
    "\n",
    "See also:\n",
    "\n",
    "* https://en.wikipedia.org/wiki/Thompson_sampling\n",
    "* https://docs.pega.com/bundle/customer-decision-hub/page/customer-decision-hub/cdh-portal/actions-model-learning.html\n",
    "\n",
    "## Spread of the Thompson Sampled propensities\n",
    "\n",
    "The following plot shows how the spread of sampled propensities narrows with more responses\n",
    "received by the models. We show simulated results for a few different model propensities."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ThompsonSamplingSpread.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convergence of the Thompson Sampled propensities\n",
    "\n",
    "To illustrate the fact that the Thompson Sampled propensities quickly converge\n",
    "to the model propensities, below plot shows the percentage of sampled propensities\n",
    "that are within 10% of the original model propensity.\n",
    "\n",
    "For new models (no responses) that is around 50% but at about 200 positive\n",
    "responses, around 90% of the sampled propensities are within 10% of the \n",
    "original model propensities. After 500 positive responses almost all of the\n",
    "sampled propensities are within that range.\n",
    "\n",
    "The colors indicate the same base propensities as before, but as can be \n",
    "seen, the convergence rate is independent of the base propensities."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PercentageOfSampled.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "## Beta distribution\n",
    "\n",
    "In principle, Thompson Sampling is based on the $\\beta$-distribution (https://en.wikipedia.org/wiki/Beta_distribution) with \"positive\" and \"negative\" responses as the shape parameters. \n",
    "\n",
    "But for practical reasons, instead of using $\\beta(positives, negatives)$, we approximate this with $\\beta(p*evidence, (1-p)*evidence)$.\n",
    "\n",
    "### Using Positives directly\n",
    "\n",
    "With https://agilestudio.pega.com/prweb/AgileStudio/app/agilestudio/bugs/BUG-803575 we will\n",
    "change the implementation in NBAD to use the positives directly so it will become $p + \\beta(positives, negatives) - \\frac{positives}{evidence}$ with $evidence = positives + negatives$.\n",
    "\n",
    "### Visualization of Thompson Sampling\n",
    "\n",
    "Here we show the distribution for a few combinations of model propensity and evidence. Note that the distributions\n",
    "do not have their top at exactly the model propensity. This is because the beta distribution gives values in the\n",
    "range 0..1 so for the (typical) small propensity values, the distribution is somewhat right-tailed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "DistributionOfSampled.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Outbound Model Maturity\n",
    "\n",
    "Thompson Sampling is related to the concept of Outbound Model Maturity. In outbound\n",
    "channels, we limit the reach of new actions to avoid sending too many actions\n",
    "\"blindly\" to customers. As more feedback is accumulated, the percentage of\n",
    "the customers that can receive the new actions is increased.\n",
    "\n",
    "We start with 2% of the population for new actions and scale that linearly to\n",
    "100% until we reach a threshold of 200 positive responses. This is illustrated\n",
    "by the following plot:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "OutboundMaturity.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Suppose we start with 5 actions and introduce a new action every week. We\n",
    "receive feedback after a week, so the reach of the new\n",
    "actions ramps up after each week until it reaches the 200 positives and is then\n",
    "mixed with the other actions.\n",
    "\n",
    "The plot below illustrates how many customers (out of a hypothetical base\n",
    "of `100000`) get the different actions.\n",
    "\n",
    "Of course, in an actual implementation, there usually are more factors that are \n",
    "considered whether or not to give an action. This includes volume constraints,\n",
    "propensity thresholding and other factors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WithExistingActions.show()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Effect on all new actions\n",
    "\n",
    "If at day zero all actions are new, it depends on the amount of available\n",
    "actions.\n",
    "\n",
    "If there are sufficient actions, then all customers may receive one, but\n",
    "the targeting will not be very personalized. The distribution will still look even.\n",
    "\n",
    "If there are few actions, then the model maturity capping may have the effect\n",
    "that initially not everyone will get an action. As the models mature and become\n",
    "more targeted, this will change."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WithAllNewActions.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}