diff --git a/.gitignore b/.gitignore index d7556d0e..024c1c54 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ dmypy.json # Pyre type checker .pyre/ + +# PyCharm +.idea/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index bd24c247..7ac53e9f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,14 @@ [submodule "pybmix/src/bayesmix"] path = pybmix/core/pybmixcpp/bayesmix - url = https://github.com/bayesmix-dev/bayesmix.git + url = https://github.com/taguhiM/bayesmix.git depth = 1 update = merge [submodule "lib/pybind11"] path = lib/pybind11 depth = 1 url = https://github.com/pybind/pybind11.git +[submodule "lib/math"] + path = lib/math + depth = 1 + url = https://github.com/bayesmix-dev/math.git + ignore = dirty \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d0b208f..e0e4c663 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,4 +52,5 @@ target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${BAYESMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) -add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) +add_custom_target(generate_protos ALL DEPENDS ${PROTO_PYS}) +add_custom_target(two_to_three ALL COMMAND ${CMAKE_CURRENT_LIST_DIR}/convert_proto.sh DEPENDS generate_protos) \ No newline at end of file diff --git a/convert_proto.sh b/convert_proto.sh new file mode 100755 index 00000000..d61a46ab --- /dev/null +++ b/convert_proto.sh @@ -0,0 +1,6 @@ +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )/.. + +# NEEDS TO BE SET MANUALLY +ENV_DIR=/home/taguhi/miniconda3/envs/pybmix3.9 + +$ENV_DIR/bin/2to3 --output-dir=$SCRIPT_DIR/pybmix/proto -W -n $SCRIPT_DIR/pybmix/proto diff --git a/docs/examples/test_run.py b/docs/examples/test_run.py new file mode 100644 index 00000000..1e57f607 --- /dev/null +++ b/docs/examples/test_run.py @@ -0,0 +1,48 @@ +import os +import sys +HERE = os.path.dirname(os.path.realpath(__file__)) +BUILD_DIR = os.path.join(HERE, "../../") +sys.path.insert(0, os.path.realpath(BUILD_DIR)) + +import numpy as np +import matplotlib.pyplot as plt +from pybmix.core.mixing import DirichletProcessMixing +from pybmix.core.hierarchy import PythonHierarchy +from pybmix.core.mixture_model import MixtureModel +np.random.seed(2021) + + +def sample_from_mixture(weights, means, sds, n_data): + n_comp = len(weights) + clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data) + return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc]) + + +y = sample_from_mixture(np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200) + + +plt.hist(y) +plt.show() + +mixing = DirichletProcessMixing(total_mass=5) +hierarchy = PythonHierarchy() +hierarchy.make_default_fixed_params(y, 2) +mixture = MixtureModel(mixing, hierarchy) + +mixture.run_mcmc(y, algorithm="Neal2", niter=2000, nburn=1000) + +from pybmix.estimators.density_estimator import DensityEstimator + +grid = np.linspace(-6, 6, 500) +dens_est = DensityEstimator(mixture) +densities = dens_est.estimate_density(grid) + +plt.hist(y, density=True) +plt.plot(grid, np.mean(densities, axis=0), lw=3, label="predictive density") +idxs = [5, 100, 300] + +for idx in idxs: + plt.plot(grid, densities[idx, :], "--", label="iteration: {0}".format(idx)) + plt.legend() + plt.show() + diff --git a/lib/pybind11 b/lib/pybind11 index 30eb39ed..a8b3ff30 160000 --- a/lib/pybind11 +++ b/lib/pybind11 @@ -1 +1 @@ -Subproject commit 30eb39ed79d1e2eeff15219ac00773034300a5e6 +Subproject commit a8b3ff30f9649459021adc80f98a945d3ac675a5 diff --git a/make_.sh b/make_.sh new file mode 100644 index 00000000..0ae2d7d2 --- /dev/null +++ b/make_.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +cd build/; +cmake ..; +make pybmixcpp; +make generate_protos; +make two_to_three; diff --git a/pybmix/core/CMakeLists.txt b/pybmix/core/CMakeLists.txt index e69de29b..42be0c75 100644 --- a/pybmix/core/CMakeLists.txt +++ b/pybmix/core/CMakeLists.txt @@ -0,0 +1,220 @@ +cmake_minimum_required(VERSION 3.14.0) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") +message("CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}") + +find_program(CCACHE_PROGRAM ccache) +if (CCACHE_PROGRAM) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") +endif() + +project(bayesmix) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_BUILD_TYPE Release) + +set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -ftree-vectorize") +set(CMAKE_FIND_PACKAGE_PREFER_CONFIG TRUE) + +# Require PkgConfig +find_package(PkgConfig REQUIRED) +find_package(OpenMP REQUIRED) +find_package(pybind11 REQUIRED) + +# TBB CMake integration +message(STATUS "Using math TBB") +# Define TBB_ROOT Folder +set(TBB_ROOT ${CMAKE_CURRENT_LIST_DIR}/pybmixcpp/bayesmix/lib/math/lib/tbb) +file(COPY ${CMAKE_CURRENT_LIST_DIR}/pybmixcpp/bayesmix/lib/math/lib/tbb_2019_U8/ DESTINATION ${TBB_ROOT}) +# Build TBB Library with CMake Integration +include(${TBB_ROOT}/cmake/TBBBuild.cmake) +list(APPEND MAKE_ARGS "tbb_build_dir=${TBB_ROOT}") +list(APPEND MAKE_ARGS "tbb_build_prefix=tbb") +tbb_build(TBB_ROOT ${TBB_ROOT} CONFIG_DIR TBB_DIR MAKE_ARGS ${MAKE_ARGS}) +# Require TBB library (for compile / link options) +find_package(TBB) + +# Check if Protobuf is present in system +find_package(Protobuf) +if (NOT Protobuf_FOUND AND NOT PROTOBUF_FOUND AND NOT TARGET protobuf::libprotobuf) + if (${CMAKE_VERSION} VERSION_LESS "3.20.0") + message(FATAL_ERROR + "Your cmake version is too old: either install a newer version (>=3.20)" + " or install google's protocol buffer (protobuf) library by hand.") + endif() + + include(FetchContent) + set(FETCHCONTENT_QUIET OFF) + set(FETCHCONTENT_UPDATES_DISCONNECTED ON) + set(BUILD_SHARED_LIBS OFF) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + set(BUILD_TESTING OFF) + + message(CHECK_START "Fetching Protobuf") + list(APPEND CMAKE_MESSAGE_INDENT " ") + + option(protobuf_BUILD_TESTS "" OFF) + set(protobuf_BUILD_EXPORT OFF) + set(protobuf_MSVC_STATIC_RUNTIME OFF) + FetchContent_Declare( + protobuf + GIT_REPOSITORY "https://github.com/protocolbuffers/protobuf.git" + GIT_TAG "v3.14.0" + GIT_SUBMODULES "" + SOURCE_SUBDIR cmake) + FetchContent_MakeAvailable(protobuf) + + list(POP_BACK CMAKE_MESSAGE_INDENT) + message(CHECK_PASS "fetched") + message("Protobuf_FOUND ${Protobuf_FOUND}") + message(" --> PROTOBUF LIB: ${PROTOBUF_LIBRARIES}") + message(" --> PROTOBUF INCLUDE: ${PROTOBUF_INCLUDE_DIRS}") + message(" --> PROTOBUF VERSION: ${Protobuf_VERSION}") + message(" --> PROTOBUF Found: ${Protobuf_FOUND}") + +endif() + +option(DISABLE_TESTS + "If tests should be compiled or no" OFF) +option(DISABLE_BENCHMARKS + "If benchmarks should be compiled or no" OFF) +option(DISABLE_DOCS + "If docs should be generated or no" OFF) +option(BUILD_RUN "" ON) + +# set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set(BASEPATH "${CMAKE_CURRENT_LIST_DIR}") +set(INCLUDE_PATHS + ${BASEPATH} + ${CMAKE_CURRENT_LIST_DIR}/pybmixcpp/bayesmix/lib/math + ${CMAKE_CURRENT_LIST_DIR}/pybmixcpp/bayesmix/lib/math/lib/boost_1.72.0 + ${CMAKE_CURRENT_LIST_DIR}/pybmixcpp/bayesmix/lib/math/lib/eigen_3.3.9 + # TBB already included + ${CMAKE_CURRENT_BINARY_DIR} + ${protobuf_SOURCE_DIR}/pybmixcpp/bayesmix/src + ${protobuf_SOURCE_DIR}/src + ) + + +set(LINK_LIBRARIES + pthread + protobuf::libprotobuf + TBB::tbb + OpenMP::OpenMP_CXX + pybind11::embed + pybind11::pybind11 + ) + +set(COMPILE_OPTIONS -D_REENTRANT -fPIC) + +file(GLOB ProtoFiles "${BASEPATH}/pybmixcpp/bayesmix/src/proto/*.proto") +set(PROTO_DIR proto) + +foreach(PROTO_FILE IN LISTS ProtoFiles) + message(STATUS "protoc proto(cc): ${PROTO_FILE}") + get_filename_component(PROTO_DIR ${PROTO_FILE} DIRECTORY) + get_filename_component(PROTO_NAME ${PROTO_FILE} NAME_WE) + set(PROTO_HDR ${CMAKE_CURRENT_BINARY_DIR}/${PROTO_NAME}.pb.h) + set(PROTO_SRC ${CMAKE_CURRENT_BINARY_DIR}/${PROTO_NAME}.pb.cc) + message(STATUS "protoc hdr: ${PROTO_HDR}") + message(STATUS "protoc src: ${PROTO_SRC}") + add_custom_command( + OUTPUT ${PROTO_SRC} ${PROTO_HDR} + COMMAND protobuf::protoc "--proto_path=${BASEPATH}/pybmixcpp/bayesmix/src/proto" + ${PROTO_DIRS} "--cpp_out=${PROJECT_BINARY_DIR}" ${PROTO_FILE} + DEPENDS ${PROTO_FILE} protobuf::protoc + COMMENT "Generate C++ protocol buffer for ${PROTO_FILE}" + VERBATIM) + list(APPEND PROTO_HDRS ${PROTO_HDR}) + list(APPEND PROTO_SRCS ${PROTO_SRC}) +endforeach() + +SET_SOURCE_FILES_PROPERTIES(${PROTO_SRCS} ${PROTO_HDRS} PROPERTIES GENERATED TRUE) + +get_directory_property(HAS_PARENT PARENT_DIRECTORY) +if(HAS_PARENT) + set(BAYESMIX_INCLUDE_PATHS ${INCLUDE_PATHS} PARENT_SCOPE) + set(BAYESMIX_LINK_LIBRARIES ${LINK_LIBRARIES} PARENT_SCOPE) + set(BAYESMIX_COMPILE_OPTIONS ${COMPILE_OPTIONS} PARENT_SCOPE) + set(PROTO_HEADERS ${PROTO_HDRS} PARENT_SCOPE) + set(PROTO_SOURCES ${PROTO_SRCS} PARENT_SCOPE) + set(ProtoFiles ${ProtoFiles} PARENT_SCOPE) +endif() + +# Build library object +add_library(bayesmix OBJECT) +target_sources(bayesmix PUBLIC ${PROTO_SRCS} ${PROTO_HDRS}) +add_subdirectory(pybmixcpp/bayesmix/src) +target_include_directories(bayesmix PUBLIC ${INCLUDE_PATHS}) +target_include_directories(bayesmix PUBLIC pybmixcpp/bayesmix) +target_link_libraries(bayesmix PUBLIC ${LINK_LIBRARIES}) +target_compile_options(bayesmix PUBLIC ${COMPILE_OPTIONS}) + +# Build static library +add_library(bayesmixlib $) + +if (BUILD_RUN) + # Build run executable + add_executable(run_mcmc $ pybmixcpp/run_mcmc.cc) + target_include_directories(run_mcmc PUBLIC ${INCLUDE_PATHS}) + target_include_directories(run_mcmc PUBLIC pybmixcpp/bayesmix/) + target_include_directories(run_mcmc PUBLIC pybmixcpp/bayesmix/src) + target_include_directories(run_mcmc PUBLIC build) + target_link_libraries(run_mcmc PUBLIC ${LINK_LIBRARIES}) + target_compile_options(run_mcmc PUBLIC ${COMPILE_OPTIONS}) +endif() + +add_subdirectory(pybmixcpp) + +if (NOT DISABLE_TESTS) + add_subdirectory(test) +endif() + +if (NOT DISABLE_BENCHMARKS) + add_subdirectory(benchmarks) +endif() + +if (NOT DISABLE_DOCS) + add_subdirectory(docs) +endif() + +if (NOT DISABLE_PLOTS) + include(FetchContent) + + set(FETCHCONTENT_QUIET OFF) + set(FETCHCONTENT_UPDATES_DISCONNECTED ON) + set(BUILD_SHARED_LIBS OFF) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + set(BUILD_TESTING OFF) + + message(CHECK_START "Fetching Matplotplusplus") + list(APPEND CMAKE_MESSAGE_INDENT " ") + + FetchContent_Declare(matplotplusplus + GIT_REPOSITORY https://github.com/alandefreitas/matplotplusplus + GIT_TAG origin/master # or whatever tag you want + ) + + FetchContent_GetProperties(matplotplusplus) + if(NOT matplotplusplus_POPULATED) + FetchContent_Populate(matplotplusplus) + add_subdirectory(${matplotplusplus_SOURCE_DIR} ${matplotplusplus_BINARY_DIR} + EXCLUDE_FROM_ALL) + endif() + # find_package(Matplot++ REQUIRED) + message("matplot " ${matplotplusplus_SOURCE_DIR}) + + add_executable(plot_mcmc $ pybmixcpp/plot_mcmc.cc + pybmixcpp/bayesmix/src/plots/plot_utils.h + pybmixcpp/bayesmix/src/plots/plot_utils.cc) + target_include_directories(plot_mcmc PUBLIC + ${INCLUDE_PATHS} ${matplotplusplus_SOURCE_DIR}/source) + target_include_directories(plot_mcmc PUBLIC pybmixcpp/bayesmix) + target_link_libraries(plot_mcmc PUBLIC ${LINK_LIBRARIES} matplot) + target_compile_options(plot_mcmc PUBLIC ${COMPILE_OPTIONS}) +endif() + +if (NOT DISABLE_EXAMPLES) + add_subdirectory(pybmixcpp/bayesmix/examples) +endif() diff --git a/pybmix/core/algo_python.asciipb b/pybmix/core/algo_python.asciipb new file mode 100644 index 00000000..d748858d --- /dev/null +++ b/pybmix/core/algo_python.asciipb @@ -0,0 +1,27 @@ +##### GENERIC SETTINGS FOR ALL ALGORITHMS ##### +# Algorithm ID string, e.g. "Neal2" +algo_id: "Neal3" + +# RNG initial seed: any nonnegative integer +rng_seed: 20201124 + +# Number of iterations of the algorithm +iterations: 1100 + +# Number of initial iterations discarded by the algorithm +burnin: 100 + +# Number of clusters in which data will be first initialized +# (NOTE: If you wish to initialize one datum per cluster, please write 0.) +# (NOTE: This value is ONLY used for initialization, and it may be overwritten +# by certain mixing objects, such as LogSBMixing. Please check a mixing's +# initialize() function to know for sure whether or not it will override this +# value.) +init_num_clusters: 3 + + +##### ALGORITHM-SPECIFIC SETTINGS ##### +# Neal8 number of auxiliary blocks +# (NOTE: 3 is the recommended value in most cases, please change it only if you +# know what you're doing.) +neal8_n_aux: 3 diff --git a/pybmix/core/fun.py b/pybmix/core/fun.py new file mode 100644 index 00000000..70c5399f --- /dev/null +++ b/pybmix/core/fun.py @@ -0,0 +1,79 @@ +import numpy as np +import scipy.stats as ss + + +def like_lpdf(x, state): + mean = state[0] + var = state[1] + return ss.norm.logpdf(x, mean, var) + + +def marg_lpdf(x, hypers): + mean = hypers[0] + var_scaling = hypers[1] + shape = hypers[2] + scale = hypers[3] + sig_n = np.sqrt(scale * (var_scaling + 1) / (shape * var_scaling)) + return ss.t.logpdf(x, 2 * shape, mean, sig_n) + + +def initialize_state(hypers): + mean = hypers[0] + # var_scaling = hypers[1] + shape = hypers[2] + scale = hypers[3] + return [mean, scale / (shape + 1)] + + +def draw(state, hypers, rng): + # s_mean = state[0] + s_var = state[1] + h_mean = hypers[0] + h_var_scaling = hypers[1] + h_shape = hypers[2] + h_scale = hypers[3] + r1 = ss.norm.rvs(h_mean, np.sqrt(s_var / h_var_scaling), random_state=rng) + r2 = 1 / (ss.gamma.rvs(h_shape, + random_state=rng) / h_scale) # Inverse gamma of shape=(h_shape) and rate=(1/h_scale) + return [r1, r2] + + +def compute_posterior_hypers(card, hypers, sum_stats): + data_sum = sum_stats[0] + data_sum_squares = sum_stats[1] + h_mean = hypers[0] + h_var_scaling = hypers[1] + h_shape = hypers[2] + h_scale = hypers[3] + if card == 0: + return hypers + post_hypers = [0] * len(hypers) + y_bar = data_sum / card + sstat = data_sum_squares - card * (y_bar ** 2) + post_hypers[0] = (h_var_scaling * h_mean + data_sum) / (h_var_scaling + card) # mean + post_hypers[1] = h_var_scaling + card # var_scaling + post_hypers[2] = h_shape + 0.5 * card # shape + num = 0.5 * h_var_scaling * card * ((y_bar - h_mean) ** 2) + denom = card + h_var_scaling + post_hypers[3] = h_scale + 0.5 * sstat + num / denom # scale + return post_hypers + + +def update_summary_statistics(x, add, sum_stats): + if not len(sum_stats): + sum_stats = [0, 0] + data_sum = sum_stats[0] + data_sum_squares = sum_stats[1] + if add: + data_sum += x[0] + data_sum_squares += x[0] ** 2 + else: + data_sum -= x[0] + data_sum_squares -= x[0] ** 2 + return [data_sum, data_sum_squares] + + +def clear_summary_statistics(sum_stats): + data_sum = 0 + data_sum_squares = 0 + return [data_sum, data_sum_squares] diff --git a/pybmix/core/hierarchy.py b/pybmix/core/hierarchy.py index 33ce1578..1b313190 100644 --- a/pybmix/core/hierarchy.py +++ b/pybmix/core/hierarchy.py @@ -29,7 +29,7 @@ def __init__(self, prior_params=None): raise ValueError( "expected 'prior_params' to be of instance [{0}]" "found {1} instead".format( - " ".join(get_oneof_types("prior", self.prior_params)), + " ".join(get_oneof_types("prior", self.prior_params)), type(prior_params))) def make_default_fixed_params(self, y, exp_num_clusters=5): @@ -63,3 +63,26 @@ class LinearModel(BaseHierarchy): ID = hierarchy_id.LinRegUni NAME = hierarchy_id.HierarchyId.Name(ID) pass + + +class PythonHierarchy(BaseHierarchy): + ID = hierarchy_id.Python + NAME = hierarchy_id.HierarchyId.Name(ID) + + def __init__(self, prior_params=None): + self.prior_params = hprior.PythonPrior() + if prior_params is not None: + success = set_oneof_field("prior", self.prior_params, prior_params) + if not success: + raise ValueError( + "expected 'prior_params' to be of instance [{0}]" + "found {1} instead".format( + " ".join(get_oneof_types("prior", self.prior_params)), + type(prior_params))) + + def make_default_fixed_params(self, y, exp_num_clusters=5): + + self.prior_params.values.data.append(np.mean(y)) + self.prior_params.values.data.append(3) + self.prior_params.values.data.append(np.var(y) / exp_num_clusters) + self.prior_params.values.data.append(0.01) diff --git a/pybmix/core/mixture_model.py b/pybmix/core/mixture_model.py index dfa55b3e..a00232fe 100644 --- a/pybmix/core/mixture_model.py +++ b/pybmix/core/mixture_model.py @@ -1,12 +1,18 @@ import logging import numpy as np +import os +import sys +HERE = os.path.dirname(os.path.realpath(__file__)) +BUILD_DIR = os.path.join(HERE, "../../build/") +sys.path.insert(0, os.path.realpath(BUILD_DIR)) + import pybmix.core.mixing as mix import pybmix.proto.algorithm_id_pb2 as algorithm_id from pybmix.core.hierarchy import BaseHierarchy from pybmix.core.chain import MCMCchain from pybmix.proto.algorithm_state_pb2 import AlgorithmState -from pybmix.core.pybmixcpp import AlgorithmWrapper, ostream_redirect +from pybmixcpp import AlgorithmWrapper, ostream_redirect MARGINAL_ALGORITHMS = ["Neal2", "Neal3", "Neal8"] CONDITIONAL_ALGORITHMS = ["BlockedGibbs"] diff --git a/pybmix/core/plot_python.sh b/pybmix/core/plot_python.sh new file mode 100755 index 00000000..50dbf9b8 --- /dev/null +++ b/pybmix/core/plot_python.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +build/plot_mcmc \ + --grid-file pybmixcpp/bayesmix/resources/tutorial/grid.csv \ + --dens-file pybmixcpp/bayesmix/resources/tutorial/out/density.csv \ + --dens-plot pybmixcpp/bayesmix/resources/tutorial/out/density.png \ + --n-cl-file pybmixcpp/bayesmix/resources/tutorial/out/numclust.csv \ + --n-cl-trace-plot pybmixcpp/bayesmix/resources/tutorial/out/traceplot.png \ + --n-cl-bar-plot pybmixcpp/bayesmix/resources/tutorial/out/nclus_barplot.png diff --git a/pybmix/core/pybmixcpp/CMakeLists.txt b/pybmix/core/pybmixcpp/CMakeLists.txt new file mode 100644 index 00000000..fb934e59 --- /dev/null +++ b/pybmix/core/pybmixcpp/CMakeLists.txt @@ -0,0 +1,7 @@ +target_sources(bayesmix + PUBLIC + python_hierarchy.h + python_hierarchy.cc + py_global.h + py_global.cc +) \ No newline at end of file diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index 98e8e624..cecbae40 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit 98e8e6244183c6d04c8f8667640918712589603e +Subproject commit cecbae40475410e10f17f8e6bc50ce296428e658 diff --git a/pybmix/core/pybmixcpp/includes.h b/pybmix/core/pybmixcpp/includes.h new file mode 100644 index 00000000..9d7bd483 --- /dev/null +++ b/pybmix/core/pybmixcpp/includes.h @@ -0,0 +1,30 @@ +#ifndef BAYESMIX_INCLUDES_2_H_ +#define BAYESMIX_INCLUDES_2_H_ + +#include "algorithm_params.pb.h" +#include "algorithms/blocked_gibbs_algorithm.h" +#include "algorithms/load_algorithms.h" +#include "algorithms/neal2_algorithm.h" +#include "algorithms/neal3_algorithm.h" +#include "algorithms/neal8_algorithm.h" +#include "collectors/file_collector.h" +#include "collectors/memory_collector.h" +#include "hierarchies/fa_hierarchy.h" +#include "hierarchies/lapnig_hierarchy.h" +#include "hierarchies/lin_reg_uni_hierarchy.h" +#include "hierarchies/nnig_hierarchy.h" +#include "hierarchies/nnw_hierarchy.h" +#include "mixings/dirichlet_mixing.h" +#include "mixings/load_mixings.h" +#include "mixings/logit_sb_mixing.h" +#include "mixings/mixture_finite_mixing.h" +#include "mixings/pityor_mixing.h" +#include "mixings/truncated_sb_mixing.h" +#include "runtime/factory.h" +#include "utils/cluster_utils.h" +#include "utils/eval_like.h" +#include "utils/io_utils.h" +#include "utils/proto_utils.h" +#include "load_hierarchies_2.h" + +#endif // BAYESMIX_INCLUDES_2_H_ diff --git a/pybmix/core/pybmixcpp/load_hierarchies_2.h b/pybmix/core/pybmixcpp/load_hierarchies_2.h new file mode 100644 index 00000000..46a858b9 --- /dev/null +++ b/pybmix/core/pybmixcpp/load_hierarchies_2.h @@ -0,0 +1,29 @@ +#ifndef BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_2_H_ +#define BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_2_H_ + +#include +#include + +#include "bayesmix/src/hierarchies/abstract_hierarchy.h" +#include "python_hierarchy.h" +#include "bayesmix/src/runtime/factory.h" + +//! Loads all available `Hierarchy` objects into the appropriate factory, so +//! that they are ready to be chosen and used at runtime. + +template +using Builder = std::function()>; + +using HierarchyFactory = Factory; + +__attribute__((constructor)) static void load_hierarchies_2() { + HierarchyFactory &factory = HierarchyFactory::Instance(); + + Builder Pythonbuilder = []() { + return std::make_shared(); + }; + + factory.add_builder(PythonHierarchy().get_id(), Pythonbuilder); +} + +#endif // BAYESMIX_HIERARCHIES_LOAD_HIERARCHIES_2_H_ diff --git a/pybmix/core/pybmixcpp/plot_mcmc.cc b/pybmix/core/pybmixcpp/plot_mcmc.cc new file mode 100644 index 00000000..213e0d6c --- /dev/null +++ b/pybmix/core/pybmixcpp/plot_mcmc.cc @@ -0,0 +1,120 @@ +#include + +#include "pybmixcpp/bayesmix/lib/argparse/argparse.h" +#include "src/plots/plot_utils.h" +#include "src/utils/io_utils.h" +#include "py_global.h" + +#define EMPTYSTR std::string("\"\"") + +int main(int argc, char const *argv[]) { + argparse::ArgumentParser args("bayesmix::plot"); + + args.add_argument("--grid-file") + .default_value(EMPTYSTR) + .help( + "Path to a .csv file containing the grid of points (one per row) " + "on which the log-density has been evaluated"); + + args.add_argument("--dens-file") + .default_value(EMPTYSTR) + .help( + "Path to a .csv file containing the evaluations of the log-density"); + + args.add_argument("--n-cl-file") + .default_value(EMPTYSTR) + .help( + "Path to a .csv file containing the number of clusters " + "(one per row) at each iteration"); + + args.add_argument("--dens-plot") + .default_value(EMPTYSTR) + .help("File to which to save the density plot"); + + args.add_argument("--n-cl-trace-plot") + .default_value(EMPTYSTR) + .help( + "File to which to save the traceplot of the number of clusters " + "in the MCMC chain"); + + args.add_argument("--n-cl-bar-plot") + .default_value(EMPTYSTR) + .help( + "File to which to save the barplot with the empirical distribution " + "of the number of clusters in the MCMC chain"); + + std::cout << "Running plot_mcmc.cc" << std::endl; + + try { + args.parse_args(argc, argv); + } catch (const std::runtime_error &err) { + std::cerr << err.what() << std::endl; + std::cerr << args; + std::exit(1); + } + + // Get other arguments + std::string ncl_file = args.get("--n-cl-file"); + std::string ncl_trace_plot = args.get("--n-cl-trace-plot"); + std::string ncl_bar_plot = args.get("--n-cl-bar-plot"); + + // TRACEPLOT OF NUMBER OF CLUSTERS + if (ncl_file != EMPTYSTR and ncl_trace_plot != EMPTYSTR) { + bayesmix::check_file_is_writeable(ncl_trace_plot); + Eigen::MatrixXd num_clus = bayesmix::read_eigen_matrix(ncl_file); + num_clus_trace(num_clus, ncl_trace_plot); + } + + // HISTOGRAM OF NUMBER OF CLUSTERS + if (ncl_file != EMPTYSTR and ncl_bar_plot != EMPTYSTR) { + bayesmix::check_file_is_writeable(ncl_bar_plot); + Eigen::MatrixXd num_clus = bayesmix::read_eigen_matrix(ncl_file); + num_clus_bar(num_clus, ncl_bar_plot); + } + + // DENSITY PLOT + std::string grid_file = args.get("--grid-file"); + std::string dens_file = args.get("--dens-file"); + std::string dens_plot = args.get("--dens-plot"); + + if (grid_file != EMPTYSTR and dens_file != EMPTYSTR and + dens_plot != EMPTYSTR) { + bayesmix::check_file_is_writeable(dens_plot); + + // Read relevant matrices + Eigen::MatrixXd grid = bayesmix::read_eigen_matrix(grid_file); + Eigen::MatrixXd dens = bayesmix::read_eigen_matrix(dens_file); + int dim = grid.cols(); + int n_points = grid.rows(); + int n_iters = dens.rows(); + + // Check that matrix dimensions are consistent + if (n_points != dens.cols()) { + std::stringstream msg; + msg << "Matrix dimensions are not consistent: rows of grid = " + << n_points + << " should be equal to columns of density = " << dens.cols(); + throw std::invalid_argument(msg.str()); + } + // Check that grid has appropriate dimensions + if (dim != 1 and dim != 2) { + std::cout << "Grid has dimension " << dim << ", its density will not " + << "be plotted" << std::endl; + } else { + // Go from log-densities to mean density + dens = dens.array().exp(); + Eigen::VectorXd mean_dens = dens.colwise().mean(); + + // Plot 1D density + if (dim == 1) { + density_plot_1d(grid, mean_dens, dens_plot); + } + // Plot 2D density + else { + density_plot_2d(grid, mean_dens, dens_plot, true); + } + } + } + + std::cout << "End of plot_mcmc.cc" << std::endl; +} diff --git a/pybmix/core/pybmixcpp/py_global.cc b/pybmix/core/pybmixcpp/py_global.cc new file mode 100644 index 00000000..30f012ec --- /dev/null +++ b/pybmix/core/pybmixcpp/py_global.cc @@ -0,0 +1,17 @@ +#include "py_global.h" + +namespace py_global{ + py::scoped_interpreter guard{}; + py::module_ numpy = py::module_::import("numpy"); + py::module_ fun = py::module_::import("fun"); + py::module_ numpy_random = py::module_::import("numpy.random"); + py::object py_engine = numpy_random.attr("MT19937")(); + py::object py_gen = numpy_random.attr("Generator")(py_engine); + py::object posterior_hypers_evaluator = fun.attr("compute_posterior_hypers"); + py::object like_lpdf_evaluator = fun.attr("like_lpdf"); + py::object marg_lpdf_evaluator = fun.attr("marg_lpdf"); + py::object initialize_state_evaluator = fun.attr("initialize_state"); + py::object draw_evaluator = fun.attr("draw"); + py::object update_summary_statistics_evaluator = fun.attr("update_summary_statistics"); + py::object clear_summary_statistics_evaluator = fun.attr("clear_summary_statistics"); +}; \ No newline at end of file diff --git a/pybmix/core/pybmixcpp/py_global.h b/pybmix/core/pybmixcpp/py_global.h new file mode 100644 index 00000000..24a991b3 --- /dev/null +++ b/pybmix/core/pybmixcpp/py_global.h @@ -0,0 +1,26 @@ +#ifndef PYBMIX_PY_GLOBAL_ +#define PYBMIX_PY_GLOBAL_ + +#include +#include + + +namespace py = pybind11; +using namespace py::literals; + +namespace py_global{ + extern py::module_ numpy; + extern py::module_ fun; + extern py::module_ numpy_random; + extern py::object py_engine; + extern py::object py_gen; + extern py::object posterior_hypers_evaluator; + extern py::object like_lpdf_evaluator; + extern py::object marg_lpdf_evaluator; + extern py::object initialize_state_evaluator; + extern py::object draw_evaluator; + extern py::object update_summary_statistics_evaluator; + extern py::object clear_summary_statistics_evaluator; +}; + +#endif \ No newline at end of file diff --git a/pybmix/core/pybmixcpp/python_hierarchy.cc b/pybmix/core/pybmixcpp/python_hierarchy.cc new file mode 100644 index 00000000..25b83498 --- /dev/null +++ b/pybmix/core/pybmixcpp/python_hierarchy.cc @@ -0,0 +1,201 @@ +#include "python_hierarchy.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "algorithm_state.pb.h" +#include "hierarchy_prior.pb.h" +#include "ls_state.pb.h" +#include "pybmixcpp/bayesmix/src/utils/rng.h" +#include "py_global.h" +#include "pybind11/stl.h" +#include "pybind11/eigen.h" + +void synchronize_cpp_to_py_state(const std::mt19937 &cpp_gen, + py::object &py_gen); + +void synchronize_py_to_cpp_state(std::mt19937 &cpp_gen, + const py::object &py_gen); + +std::vector list_to_vector(py::list &x); + +//! PYTHON +double PythonHierarchy::like_lpdf(const Eigen::RowVectorXd &datum) const { + double result = py_global::like_lpdf_evaluator(datum, state.generic_state).cast(); + return result; +} + +//! PYTHON +double PythonHierarchy::marg_lpdf(const Python::Hyperparams ¶ms, + const Eigen::RowVectorXd &datum) const { + double result = py_global::marg_lpdf_evaluator(datum, params.generic_hypers).cast(); + return result; +} + +//! PYTHON +void PythonHierarchy::initialize_state() { + py::list state_py = py_global::initialize_state_evaluator(hypers->generic_hypers); + state.generic_state = list_to_vector(state_py); +} + +//! C++ +void PythonHierarchy::initialize_hypers() { + if (prior->has_values()) { + // Set values + hypers->generic_hypers.clear(); + int size = prior->values().size(); + for (int i = 0; i < size; ++i) { + hypers->generic_hypers.push_back((prior->values().data())[i]); + } + } +} + +//! PYTHON +//! TODO: put in python +void PythonHierarchy::update_hypers( + const std::vector &states) { + auto &rng = bayesmix::Rng::Instance().get(); + if (prior->has_values()) return; +} + +//! PYTHON +Python::State PythonHierarchy::draw(const Python::Hyperparams ¶ms) { + Python::State out; + synchronize_cpp_to_py_state(bayesmix::Rng::Instance().get(), py_global::py_gen); + py::list draw_py = py_global::draw_evaluator(state.generic_state,params.generic_hypers,py_global::py_gen); + out.generic_state = list_to_vector(draw_py); + synchronize_py_to_cpp_state(bayesmix::Rng::Instance().get(), py_global::py_gen); + return out; +} + + +//! PYTHON +void PythonHierarchy::update_summary_statistics( + const Eigen::RowVectorXd &datum, const bool add) { + py::list sum_stats_py = py_global::update_summary_statistics_evaluator(datum,add,sum_stats); +// data_sum = sum_stats_py[0].cast(); +// data_sum_squares = sum_stats_py[1].cast(); + sum_stats = list_to_vector(sum_stats_py); +} + +//! PYTHON +void PythonHierarchy::clear_summary_statistics() { + py::list sum_stats_py = py_global::clear_summary_statistics_evaluator(sum_stats); +// data_sum = sum_stats_py[0].cast(); +// data_sum_squares = sum_stats_py[1].cast(); + sum_stats = list_to_vector(sum_stats_py); +} + +//! PYTHON +Python::Hyperparams PythonHierarchy::compute_posterior_hypers() const { + // Compute posterior hyperparameters + Python::Hyperparams post_params; + py::list post_params_py = py_global::posterior_hypers_evaluator(card,hypers->generic_hypers,sum_stats); + post_params.generic_hypers = list_to_vector(post_params_py); + return post_params; + } + + +//! C++ +void PythonHierarchy::set_state_from_proto( + const google::protobuf::Message &state_) { + auto &statecast = downcast_state(state_); + int size = statecast.general_state().size(); + std::vector aux_v{}; + for (int i = 0; i < size; ++i) { + aux_v.push_back((statecast.general_state().data())[i]); + } + state.generic_state = aux_v; + set_card(statecast.cardinality()); +} + +//! C++ +std::shared_ptr +PythonHierarchy::get_state_proto() const { + bayesmix::Vector state_; + state_.set_size(state.generic_state.size()); + *state_.mutable_data() = { + state.generic_state.data(), + state.generic_state.data() + state.generic_state.size()}; + auto out = std::make_shared(); + out->mutable_general_state()->CopyFrom(state_); + return out; +} + +//! C++ +void PythonHierarchy::set_hypers_from_proto( + const google::protobuf::Message &hypers_) { + auto &hyperscast = downcast_hypers(hypers_).python_state(); + int size = hyperscast.data().size(); + std::vector aux_v{}; + for (int i = 0; i < size; ++i) { + aux_v.push_back((hyperscast.data())[i]); + } + hypers->generic_hypers = aux_v; +} + +//! C++ +std::shared_ptr +PythonHierarchy::get_hypers_proto() const { + bayesmix::Vector hypers_; + hypers_.set_size(hypers->generic_hypers.size()); + *hypers_.mutable_data() = { + hypers->generic_hypers.data(), + hypers->generic_hypers.data() + hypers->generic_hypers.size()}; + auto out = std::make_shared(); + out->mutable_python_state()->CopyFrom(hypers_); + return out; +} + +void synchronize_cpp_to_py_state(const std::mt19937 &cpp_gen, + py::object &py_gen) { + std::stringstream state{}; + state << cpp_gen; + + std::string aux_string{}; + py::list state_list{}; + + for (unsigned int n = 0; n < 624; ++n) { + state >> aux_string; + state_list.append(std::stoul(aux_string)); + } + state >> aux_string; + unsigned int pos = std::stoul(aux_string); + + py::object array = py_global::numpy.attr("array")(state_list, "uint32"); + py::dict state_dict("key"_a = array, "pos"_a = pos); + py::dict d("bit_generator"_a = "MT19937", "state"_a = state_dict); + py_gen.attr("__setstate__")(d); +} + +void synchronize_py_to_cpp_state(std::mt19937 &cpp_gen, + const py::object &py_gen) { + py::object py_state = py_gen.attr("__getstate__")(); + py::object state_ = py_state["state"]["key"].attr("tolist")(); + auto pos_ = py_state["state"]["pos"].cast(); + + std::stringstream ss_state_; + for (auto elem: state_) { + ss_state_ << elem.cast() << " "; + } + ss_state_ << pos_; + ss_state_ >> cpp_gen; +} + + +std::vector list_to_vector(py::list &x) { + unsigned int size = x.size(); + std::vector v(size); + for (unsigned int i = 0; i < size; ++i) { + v[i] = x[i].cast(); + } + return v; +} \ No newline at end of file diff --git a/pybmix/core/pybmixcpp/python_hierarchy.h b/pybmix/core/pybmixcpp/python_hierarchy.h new file mode 100644 index 00000000..d632d1fc --- /dev/null +++ b/pybmix/core/pybmixcpp/python_hierarchy.h @@ -0,0 +1,135 @@ +#ifndef BAYESMIX_HIERARCHIES_PYTHON_HIERARCHY_H_ +#define BAYESMIX_HIERARCHIES_PYTHON_HIERARCHY_H_ + +#include +#include +#include + +#include +#include +#include + +#include "algorithm_state.pb.h" +#include "pybmixcpp/bayesmix/src/hierarchies/conjugate_hierarchy.h" +#include "hierarchy_id.pb.h" +#include "hierarchy_prior.pb.h" + +namespace py = pybind11; +using namespace py::literals; + +extern py::module_ fun; + +//! Conjugate Normal Normal-InverseGamma hierarchy for univariate data. + +//! This class represents a hierarchical model where data are distributed +//! according to a normal likelihood, the parameters of which have a +//! Normal-InverseGamma centering distribution. That is: +//! f(x_i|mu,sig) = N(mu,sig^2) +//! (mu,sig^2) ~ N-IG(mu0, lambda0, alpha0, beta0) +//! The state is composed of mean and variance. The state hyperparameters, +//! contained in the Hypers object, are (mu_0, lambda0, alpha0, beta0), all +//! scalar values. Note that this hierarchy is conjugate, thus the marginal +//! distribution is available in closed form. For more information, please +//! refer to parent classes: `AbstractHierarchy`, `BaseHierarchy`, and +//! `ConjugateHierarchy`. + +namespace Python { +//! Custom container for State values +struct State { + std::vector generic_state; +}; + +//! Custom container for Hyperparameters values +struct Hyperparams { + std::vector generic_hypers; +}; + +}; // namespace Python + +class PythonHierarchy + : public ConjugateHierarchy { + public: + PythonHierarchy() = default; + ~PythonHierarchy() = default; + +// py::object posterior_hypers_evaluator = fun.attr("compute_posterior_hypers"); + + + //! Updates hyperparameter values given a vector of cluster states + void update_hypers(const std::vector + &states) override; + + //! Updates state values using the given (prior or posterior) hyperparameters + Python::State draw(const Python::Hyperparams ¶ms); + + //! Resets summary statistics for this cluster + void clear_summary_statistics() override; + + //! Returns the Protobuf ID associated to this class + bayesmix::HierarchyId get_id() const override { + return bayesmix::HierarchyId::Python; + } + + //! Read and set state values from a given Protobuf message + void set_state_from_proto(const google::protobuf::Message &state_) override; + + //! Read and set hyperparameter values from a given Protobuf message + void set_hypers_from_proto( + const google::protobuf::Message &hypers_) override; + + //! Writes current state to a Protobuf message and return a shared_ptr + //! New hierarchies have to first modify the field 'oneof val' in the + //! AlgoritmState::ClusterState message by adding the appropriate type + std::shared_ptr get_state_proto() + const override; + + //! Writes current value of hyperparameters to a Protobuf message and + //! return a shared_ptr. + //! New hierarchies have to first modify the field 'oneof val' in the + //! AlgoritmState::HierarchyHypers message by adding the appropriate type + std::shared_ptr get_hypers_proto() + const override; + + //! Computes and return posterior hypers given data currently in this cluster + Python::Hyperparams compute_posterior_hypers() const; + + //! Returns whether the hierarchy models multivariate data or not + bool is_multivariate() const override { return false; } + + protected: + //! Evaluates the log-likelihood of data in a single point + //! @param datum Point which is to be evaluated + //! @return The evaluation of the lpdf + double like_lpdf(const Eigen::RowVectorXd &datum) const override; + + //! Evaluates the log-marginal distribution of data in a single point + //! @param params Container of (prior or posterior) hyperparameter values + //! @param datum Point which is to be evaluated + //! @return The evaluation of the lpdf + double marg_lpdf(const Python::Hyperparams ¶ms, + const Eigen::RowVectorXd &datum) const override; + + //! Updates cluster statistics when a datum is added or removed from it + //! @param datum Data point which is being added or removed + //! @param add Whether the datum is being added or removed + void update_summary_statistics(const Eigen::RowVectorXd &datum, + const bool add) override; + + //! Initializes state parameters to appropriate values + void initialize_state() override; + + //! Initializes hierarchy hyperparameters to appropriate values + void initialize_hypers() override; + +// //! Sum of data points currently belonging to the cluster +// double data_sum = 0; +// +// //! Sum of squared data points currently belonging to the cluster +// double data_sum_squares = 0; + + //! Vector of summary statistics + std::vector sum_stats; +}; + +#endif // BAYESMIX_HIERARCHIES_PYTHON_HIERARCHY_H_ diff --git a/pybmix/core/pybmixcpp/run_mcmc.cc b/pybmix/core/pybmixcpp/run_mcmc.cc new file mode 100644 index 00000000..bc4b33cf --- /dev/null +++ b/pybmix/core/pybmixcpp/run_mcmc.cc @@ -0,0 +1,266 @@ +#include + +#include +#include + +#include "pybmixcpp/bayesmix/lib/argparse/argparse.h" +#include "includes.h" +#include "py_global.h" + + +#define EMPTYSTR std::string("\"\"") + +bool check_args(const argparse::ArgumentParser &args) { + if (args["--coll-name"] != std::string("memory")) { + bayesmix::check_file_is_writeable(args.get("--coll-name")); + } + if (args["--dens-file"] != EMPTYSTR) { + bayesmix::check_file_is_writeable(args.get("--dens-file")); + } + if (args["--n-cl-file"] != EMPTYSTR) { + bayesmix::check_file_is_writeable(args.get("--n-cl-file")); + } + if (args["--clus-file"] != EMPTYSTR) { + bayesmix::check_file_is_writeable(args.get("--clus-file")); + } + if (args["--best-clus-file"] != EMPTYSTR) { + bayesmix::check_file_is_writeable( + args.get("--best-clus-file")); + } + + return true; +} + +int main(int argc, char *argv[]) { + // to check python interpreter is working + py::print("Hello from Python"); + + argparse::ArgumentParser args("bayesmix::run"); + + args.add_argument("--algo-params-file") + .required() + .help( + "asciipb file with the parameters of the algorithm, see " + "the file proto/algorithm_params.proto"); + + args.add_argument("--hier-type") + .required() + .help( + "enum string of the hierarchy, see the file " + "proto/hierarchy_id.proto"); + + args.add_argument("--hier-args") + .required() + .help( + "asciipb file with the parameters of the hierarchy, see " + "the file proto/hierarchy_prior.proto"); + + args.add_argument("--mix-type") + .required() + .help("enum string of the mixing, see the file proto/mixing_id.proto"); + + args.add_argument("--mix-args") + .required() + .help( + "asciipb file with the parameters of the mixing, see " + "the file proto/mixing_prior.proto"); + + args.add_argument("--coll-name") + .required() + .default_value("memory") + .help("If not 'memory', the path where to save the MCMC chains"); + + args.add_argument("--data-file") + .required() + .help("Path to a .csv file containing the observations (one per row)"); + + args.add_argument("--grid-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Path to a csv file containing a grid of points where to " + "evaluate the (log) predictive density"); + + args.add_argument("--dens-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Where to store the output of the (log) predictive " + "density"); + + args.add_argument("--n-cl-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Where to store the MCMC chain of the number of " + "clusters"); + + args.add_argument("--clus-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Where to store the MCMC chain of the cluster " + "allocations"); + + args.add_argument("--best-clus-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Where to store the best cluster allocation found by " + "minimizing the Binder loss function over the visited partitions"); + + args.add_argument("--hier-cov-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Only for dependent models. Path to a csv file with the " + "covariates used in the hierarchy"); + + args.add_argument("--hier-grid-cov-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Only for dependent models and when 'grid-file' is not " + "empty. " + "Path to a csv file with the values covariates used in the " + "hierarchy " + "on which to evaluate the (log) predictive density"); + + args.add_argument("--mix-cov-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Only for dependent models. Path to a csv file with the " + "covariates used in the mixing"); + + args.add_argument("--mix-grid-cov-file") + .default_value(EMPTYSTR) + .help( + "(Optional) Only for dependent models and when 'grid-file' is not " + "empty. " + "Path to a csv file with the values covariates used in the mixing " + "on which to evaluate the (log) predictive density"); + + std::cout << "Running run_mcmc.cc" << std::endl; + + try { + args.parse_args(argc, argv); + } catch (const std::runtime_error &err) { + std::cerr << err.what() << std::endl; + std::cerr << args; + std::exit(1); + } + check_args(args); + + // Read algorithm settings proto + bayesmix::AlgorithmParams algo_proto; + bayesmix::read_proto_from_file(args.get("--algo-params-file"), + &algo_proto); + + // Create factories and objects + auto &factory_algo = AlgorithmFactory::Instance(); + auto &factory_hier = HierarchyFactory::Instance(); + auto &factory_mixing = MixingFactory::Instance(); + auto algo = factory_algo.create_object(algo_proto.algo_id()); + auto hier = factory_hier.create_object(args.get("--hier-type")); + auto mixing = + factory_mixing.create_object(args.get("--mix-type")); + + BaseCollector *coll; + if (args["--coll-name"] == std::string("memory")) { + std::cout << "Creating MemoryCollector" << std::endl; + coll = new MemoryCollector(); + } else { + std::cout << "Creating FileCollector, writing to file: " + << args.get("--coll-name") << std::endl; + coll = new FileCollector(args.get("--coll-name")); + } + + bayesmix::read_proto_from_file(args.get("--mix-args"), + mixing->get_mutable_prior()); + bayesmix::read_proto_from_file(args.get("--hier-args"), + hier->get_mutable_prior()); + + // Read data matrices + Eigen::MatrixXd data = + bayesmix::read_eigen_matrix(args.get("--data-file")); + + // Set algorithm parameters + algo->read_params_from_proto(algo_proto); + + // Allocate objects in algorithm + algo->set_mixing(mixing); + algo->set_data(data); + algo->set_hierarchy(hier); + + // Read and set covariates + if (hier->is_dependent()) { + Eigen::MatrixXd hier_cov = + bayesmix::read_eigen_matrix(args.get("--hier-cov-file")); + algo->set_hier_covariates(hier_cov); + } + + if (mixing->is_dependent()) { + Eigen::MatrixXd mix_cov = + bayesmix::read_eigen_matrix(args.get("--mix-cov-file")); + algo->set_mix_covariates(mix_cov); + } + + // Run algorithm + algo->run(coll); + + if (args["--grid-file"] != EMPTYSTR && args["--dens-file"] != EMPTYSTR) { + Eigen::MatrixXd grid = + bayesmix::read_eigen_matrix(args.get("--grid-file")); + Eigen::MatrixXd hier_cov_grid = Eigen::RowVectorXd(0); + Eigen::MatrixXd mix_cov_grid = Eigen::RowVectorXd(0); + if (hier->is_dependent()) { + hier_cov_grid = bayesmix::read_eigen_matrix( + args.get("--hier-grid-cov-file")); + } + if (mixing->is_dependent()) { + mix_cov_grid = bayesmix::read_eigen_matrix( + args.get("--mix-grid-cov-file")); + } + + std::cout << "Computing log-density..." << std::endl; + Eigen::MatrixXd dens = bayesmix::eval_lpdf_parallel( + algo, coll, grid, hier_cov_grid, mix_cov_grid, false, 1); + bayesmix::write_matrix_to_file(dens, args.get("--dens-file")); + std::cout << "Successfully wrote density to " + << args.get("--dens-file") << std::endl; + } + + if (args["--n-cl-file"] != EMPTYSTR || args["--clus-file"] != EMPTYSTR || + args["--best-clus-file"] != EMPTYSTR) { + Eigen::MatrixXi clusterings(coll->get_size(), data.rows()); + Eigen::VectorXi num_clust(coll->get_size()); + for (int i = 0; i < coll->get_size(); i++) { + bayesmix::AlgorithmState state; + coll->get_next_state(&state); + for (int j = 0; j < data.rows(); j++) { + clusterings(i, j) = state.cluster_allocs(j); + } + num_clust(i) = state.cluster_states_size(); + } + coll->reset(); + + if (args["--n-cl-file"] != EMPTYSTR) { + bayesmix::write_matrix_to_file(num_clust, + args.get("--n-cl-file")); + std::cout << "Successfully wrote number of clusters to " + << args.get("--n-cl-file") << std::endl; + } + + if (args["--clus-file"] != EMPTYSTR) { + bayesmix::write_matrix_to_file(clusterings, + args.get("--clus-file")); + std::cout << "Successfully wrote cluster allocations to " + << args.get("--clus-file") << std::endl; + } + + if (args["--best-clus-file"] != EMPTYSTR) { + Eigen::VectorXi best_clus = bayesmix::cluster_estimate(clusterings); + bayesmix::write_matrix_to_file( + best_clus, args.get("--best-clus-file")); + std::cout << "Successfully wrote best cluster allocations to " + << args.get("--best-clus-file") << std::endl; + } + } + + std::cout << "End of run_mcmc.cc" << std::endl; + delete coll; + return 0; +} diff --git a/pybmix/core/pybmixcpp/tmp.hpp b/pybmix/core/pybmixcpp/tmp.hpp new file mode 100644 index 00000000..e69de29b diff --git a/pybmix/core/python.asciipb b/pybmix/core/python.asciipb new file mode 100644 index 00000000..f12859af --- /dev/null +++ b/pybmix/core/python.asciipb @@ -0,0 +1,7 @@ +values { + size: 4 + data: 5.5 + data: 0.2 + data: 1.5 + data: 2 +} diff --git a/pybmix/core/run_python.sh b/pybmix/core/run_python.sh new file mode 100755 index 00000000..04f65213 --- /dev/null +++ b/pybmix/core/run_python.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +build/run_mcmc \ + --algo-params-file algo_python.asciipb \ + --hier-type Python --hier-args python.asciipb \ + --mix-type DP --mix-args pybmixcpp/bayesmix/resources/tutorial/dp_gamma.asciipb \ + --coll-name pybmixcpp/bayesmix/resources/tutorial/out/chains.recordio \ + --data-file pybmixcpp/bayesmix/resources/tutorial/data.csv \ + --grid-file pybmixcpp/bayesmix/resources/tutorial/grid.csv \ + --dens-file pybmixcpp/bayesmix/resources/tutorial/out/density.csv \ + --n-cl-file pybmixcpp/bayesmix/resources/tutorial/out/numclust.csv \ + --clus-file pybmixcpp/bayesmix/resources/tutorial/out/clustering.csv \ + --best-clus-file pybmixcpp/bayesmix/resources/tutorial/out/best_clustering.csv diff --git a/pybmix/estimators/cluster_estimator.py b/pybmix/estimators/cluster_estimator.py index 8dadc719..67ee8452 100644 --- a/pybmix/estimators/cluster_estimator.py +++ b/pybmix/estimators/cluster_estimator.py @@ -1,7 +1,13 @@ import numpy as np +import os +import sys +HERE = os.path.dirname(os.path.realpath(__file__)) +BUILD_DIR = os.path.join(HERE, "../../build/") +sys.path.insert(0, os.path.realpath(BUILD_DIR)) + from pybmix.core.mixture_model import MixtureModel -from pybmix.core.pybmixcpp import _minbinder_cluster_estimate, ostream_redirect +from pybmixcpp import _minbinder_cluster_estimate, ostream_redirect class ClusterEstimator(object): """