|
448 | 448 | "source": [
|
449 | 449 | "# Save configs in list_exp_data_ (hacky variable name)\n",
|
450 | 450 | "import pickle\n",
|
451 |
| - "pik = \"mdpp_hydra_reward_scales_pickle.dat\"\n", |
| 451 | + "pik = \"mdpp_hydra_configs_pickle.dat\"\n", |
452 | 452 | "\n",
|
453 | 453 | "import os.path\n",
|
454 | 454 | "if not os.path.exists(pik):\n",
|
|
465 | 465 | "metadata": {},
|
466 | 466 | "outputs": [],
|
467 | 467 | "source": [
|
468 |
| - "# Save configs in list_exp_data_reward_scales\n", |
| 468 | + "# Save reward_scales in list_exp_data_reward_scales\n", |
469 | 469 | "import pickle\n",
|
470 | 470 | "pik = \"mdpp_hydra_reward_scales_pickle.dat\"\n",
|
471 | 471 | "\n",
|
|
529 | 529 | "del list_exp_data_reward_scales[259]"
|
530 | 530 | ]
|
531 | 531 | },
|
| 532 | + { |
| 533 | + "cell_type": "code", |
| 534 | + "execution_count": null, |
| 535 | + "metadata": {}, |
| 536 | + "outputs": [], |
| 537 | + "source": [ |
| 538 | + "contents = []\n", |
| 539 | + "for key in list_exp_data[0]:\n", |
| 540 | + " contents.append(key)\n", |
| 541 | + "print(contents)\n", |
| 542 | + "# print(list_exp_data[0]['train_stats'])\n", |
| 543 | + "print(len(list_exp_data[0]['dims_values']))\n", |
| 544 | + "\n", |
| 545 | + "print(len(list_exp_data_with_configs))\n", |
| 546 | + "print(list_exp_data_with_configs[0]['train_stats'].iloc[0,:])\n", |
| 547 | + "print(list_exp_data_with_configs[0]['train_stats'].iloc[1,:])\n", |
| 548 | + "\n", |
| 549 | + "# print(list_exp_data_with_configs[0]['train_stats']['learning_starts'])\n", |
| 550 | + "# learn_startss_mean = list_exp_data_with_configs[0]['train_stats']['learning_starts'].mean()\n", |
| 551 | + "# print(\"mean(learn_startss):\", learn_startss_mean)\n", |
| 552 | + "\n", |
| 553 | + "print(len(list_exp_data_reward_scales))\n", |
| 554 | + "# print(list_exp_data_reward_scales[0]['train_stats'])" |
| 555 | + ] |
| 556 | + }, |
532 | 557 | {
|
533 | 558 | "cell_type": "code",
|
534 | 559 | "execution_count": null,
|
|
587 | 612 | "top_configs = {}\n",
|
588 | 613 | "top_configs_mins = {}\n",
|
589 | 614 | "perfs_all_envs = {}\n",
|
| 615 | + "\n", |
| 616 | + "print(\"env x agent grid size:\", num_env_configs, num_agent_configs)\n", |
590 | 617 | "for perf_set in perf_sets:\n",
|
591 | 618 | " top_configs[perf_set] = []\n",
|
592 | 619 | " top_configs_mins[perf_set] = []\n",
|
593 | 620 | " perfs_all_envs[perf_set] = np.zeros(shape=(num_env_configs, num_agent_configs))\n",
|
594 | 621 | "\n",
|
595 | 622 | "corrs = {}\n",
|
| 623 | + "corrs_spm = {}\n", |
596 | 624 | "import itertools\n",
|
597 | 625 | "corr_sets = ['train', 'eval', 'train_auc', 'eval_auc']\n",
|
598 | 626 | "corr_combos = list(itertools.combinations(corr_sets, 2))\n",
|
599 | 627 | "\n",
|
600 | 628 | "# corr_sets = ['train_eval', 'train_auc_eval_auc', 'eval_eval_auc', 'train_eval_auc', 'train_train_auc', 'eval_train_auc']\n",
|
601 | 629 | "for corr_combo in corr_combos:\n",
|
602 | 630 | " corrs[corr_combo[0] + ' and ' + corr_combo[1]] = []\n",
|
603 |
| - "\n", |
| 631 | + " corrs_spm[corr_combo[0] + ' and ' + corr_combo[1]] = []\n", |
| 632 | + " \n", |
604 | 633 | "for i in range(num_env_configs):\n",
|
605 | 634 | "# if i == 259:\n",
|
606 | 635 | "# continue\n",
|
|
627 | 656 | " for combo in corr_combos:\n",
|
628 | 657 | " corr_ = prs(perfs[combo[0]], perfs[combo[1]])[0]\n",
|
629 | 658 | " corrs[combo[0] + ' and ' + combo[1]].append(corr_)\n",
|
| 659 | + " \n", |
| 660 | + " corr_ = spm(perfs[combo[0]], perfs[combo[1]])[0]\n", |
| 661 | + " corrs_spm[combo[0] + ' and ' + combo[1]].append(corr_)\n", |
| 662 | + " \n", |
630 | 663 | "\n",
|
631 | 664 | "# corrs['train_eval']\n",
|
632 | 665 | "# corrs['train_auc_eval_auc'].append(prs(perfs['train_auc'], perfs['eval_auc']))\n",
|
633 | 666 | "# corrs['eval_eval_auc'].append(prs(perfs['eval'], perfs['eval_auc']))\n",
|
634 | 667 | "# corrs['train_eval_auc'].append(prs(perfs['train'], perfs['eval_auc']))\n",
|
635 | 668 | "# corrs['train_train_auc'].append(prs(perfs['train'], perfs['train_auc']))\n",
|
636 | 669 | "# corrs['eval_train_auc'].append(prs(perfs['eval'], perfs['train_auc']))\n",
|
| 670 | + "\n", |
637 | 671 | "\n"
|
638 | 672 | ]
|
639 | 673 | },
|
|
734 | 768 | "\n",
|
735 | 769 | "\n",
|
736 | 770 | " print(\"Final portfolio:\", portfolio[perf_set])\n",
|
| 771 | + " print(\"Final portfolio perf.:\", np.sum(hydra_perfs[perf_set]))\n", |
| 772 | + " print(\"Oracle perf.:\", sum_over_maxes[perf_set][0])\n", |
737 | 773 | " print(\"Final portfolio mins:\", portfolio_mins[perf_set])\n",
|
738 | 774 | "\n",
|
739 | 775 | " import matplotlib.pyplot as plt\n",
|
|
745 | 781 | " plt.legend()\n",
|
746 | 782 | " plt.xlabel('Portfolio building iter.')\n",
|
747 | 783 | " plt.ylabel('Reward or number of configs.')\n",
|
| 784 | + " plt.yscale('log')\n", |
| 785 | + " plt.grid(which='both')\n", |
748 | 786 | " plt.show()\n",
|
749 | 787 | "\n",
|
750 | 788 | "# print(port_perfs_mins, sum(port_perfs_mins))\n",
|
|
760 | 798 | " plt.plot(sum_over_maxes[perf_set], label=\"Sum over maxes\")\n",
|
761 | 799 | " plt.plot(max_over_sums, label=\"Max over sums\")\n",
|
762 | 800 | " plt.legend()\n",
|
| 801 | + " plt.grid(which='both')\n", |
763 | 802 | " plt.show()\n",
|
764 | 803 | " \n",
|
765 | 804 | " \n",
|
|
775 | 814 | "# print(perfs_all_envs)\n",
|
776 | 815 | "for combo in corr_combos:\n",
|
777 | 816 | "# print(\"Corr. on \" + str(combo[0] + ' and ' + combo[1]), corrs[combo[0] + ' and ' + combo[1]])\n",
|
778 |
| - " print(\"Max corr. on \" + str(combo[0] + ' and ' + combo[1]), max(corrs[combo[0] + ' and ' + combo[1]]))\n", |
| 817 | + " print(\"Max (across envs) corr. on \" + str(combo[0] + ' and ' + combo[1]), max(corrs[combo[0] + ' and ' + combo[1]]))\n", |
779 | 818 | " print(\"Min corr. on \" + str(combo[0] + ' and ' + combo[1]), min(corrs[combo[0] + ' and ' + combo[1]]))\n",
|
| 819 | + " print(\"Max spm corr. on \" + str(combo[0] + ' and ' + combo[1]), max(corrs_spm[combo[0] + ' and ' + combo[1]]))\n", |
| 820 | + " print(\"Min spm corr. on \" + str(combo[0] + ' and ' + combo[1]), min(corrs_spm[combo[0] + ' and ' + combo[1]]))\n", |
| 821 | + " \n", |
780 | 822 | "# corrs[combo[0] + ' and ' + combo[1]]\n",
|
781 | 823 | "\n",
|
782 | 824 | "# for i in range(num_env_configs):\n",
|
783 | 825 | "# corrs[combo[0] + ' and ' + combo[1]]\n"
|
784 | 826 | ]
|
785 | 827 | },
|
| 828 | + { |
| 829 | + "cell_type": "code", |
| 830 | + "execution_count": null, |
| 831 | + "metadata": {}, |
| 832 | + "outputs": [], |
| 833 | + "source": [ |
| 834 | + "# Spearman correlation of agent configs on 100 random pairs of envs\n", |
| 835 | + "import random\n", |
| 836 | + "\n", |
| 837 | + "random.seed(0)\n", |
| 838 | + "\n", |
| 839 | + "# From https://stackoverflow.com/a/48581219/11063709\n", |
| 840 | + "n = 1000\n", |
| 841 | + "A = list(range(n))\n", |
| 842 | + "k = 2\n", |
| 843 | + "m = 100\n", |
| 844 | + "\n", |
| 845 | + "samples = set()\n", |
| 846 | + "tries = 0\n", |
| 847 | + "while len(samples) < m:\n", |
| 848 | + " samples.add(tuple(sorted(random.sample(A, k))))\n", |
| 849 | + " tries += 1\n", |
| 850 | + "\n", |
| 851 | + "samples = list(samples)\n", |
| 852 | + "# print(samples)\n", |
| 853 | + "# print(tries)\n", |
| 854 | + "\n", |
| 855 | + "corrs_spm_agents_on_envs = {}\n", |
| 856 | + "for perf_set in perf_sets:\n", |
| 857 | + " corrs_spm_agents_on_envs[perf_set] = []\n", |
| 858 | + "\n", |
| 859 | + "print(\"Spearman correlation of agent configs on 100 random pairs of envs:\")\n", |
| 860 | + "print(\"Mean, std, max, min\")\n", |
| 861 | + "for perf_set in perf_sets: \n", |
| 862 | + " for i in range(len(samples)):\n", |
| 863 | + "# print(perfs[perf_set])\n", |
| 864 | + " env_0_perfs = perfs_all_envs[perf_set][samples[i][0], :]\n", |
| 865 | + " env_1_perfs = perfs_all_envs[perf_set][samples[i][1], :]\n", |
| 866 | + " \n", |
| 867 | + " corr_spm = spm(env_0_perfs, env_1_perfs)[0]\n", |
| 868 | + " corrs_spm_agents_on_envs[perf_set].append(corr_spm)\n", |
| 869 | + " \n", |
| 870 | + "# print(corrs_spm_agents_on_envs[perf_set])\n", |
| 871 | + "\n", |
| 872 | + " print(perf_set, np.mean(corrs_spm_agents_on_envs[perf_set]), np.std(corrs_spm_agents_on_envs[perf_set]), np.max(corrs_spm_agents_on_envs[perf_set]), np.min(corrs_spm_agents_on_envs[perf_set]))" |
| 873 | + ] |
| 874 | + }, |
786 | 875 | {
|
787 | 876 | "cell_type": "code",
|
788 | 877 | "execution_count": null,
|
|
0 commit comments