Skip to content

Commit 03425c9

Browse files
committed
implement partition mode (single_bucket, less_bucket)
1 parent b1f0bcd commit 03425c9

File tree

10 files changed

+423
-36
lines changed

10 files changed

+423
-36
lines changed

storage/innobase/buf/buf0flu.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,12 @@ buf_flush_write_block_low(
10471047

10481048
/* Force the log to the disk before writing the modified block */
10491049
if (!srv_read_only_mode) {
1050+
#if defined (UNIV_PMEMOBJ_LOG) || defined (UNIV_PMEMOBJ_WAL)
1051+
//Since the log records are persist in NVM we don't need to follow WAL rule
1052+
//Skip flush log here
1053+
#else //original
10501054
log_write_up_to(bpage->newest_modification, true);
1055+
#endif
10511056
}
10521057

10531058
switch (buf_page_get_state(bpage)) {

storage/innobase/fil/fil0fil.cc

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Created 10/25/1995 Heikki Tuuri
6161
//declare it at storage/innobase/srv/srv0start.cc
6262
extern PMEM_FILE_COLL* gb_pfc;
6363
#endif
64-
#if defined (UNIV_PMEMOBJ_LOG) || defined (UNIV_PMEMOBJ_DBW) || defined(UNIV_PMEMOBJ_BUF)
64+
#if defined (UNIV_PMEMOBJ_LOG) || defined (UNIV_PMEMOBJ_DBW) || defined(UNIV_PMEMOBJ_BUF) || defined (UNIV_PMEMOBJ_WAL)
6565
#include "my_pmem_common.h"
6666
#include "my_pmemobj.h"
6767
extern PMEM_WRAPPER* gb_pmw;
@@ -5890,6 +5890,9 @@ fil_io(
58905890
return(err);
58915891
}
58925892
#if defined (UNIV_PMEMOBJ_BUF)
5893+
/*
5894+
* pm_fil_io_batch original, without space_oriented sort
5895+
* */
58935896
dberr_t
58945897
pm_fil_io_batch(
58955898
const IORequest& type,
@@ -6127,6 +6130,148 @@ pm_fil_io_batch(
61276130
return DB_SUCCESS;
61286131

61296132
}
6133+
#if defined (UNIV_PMEMOBJ_BUF_PARTITION)
6134+
/*
6135+
*Collect information about mapping a space to a hashed list and store in local heap
6136+
Call this function every time writing a page form buffer pool to PMEM_BUF
6137+
* */
6138+
void
6139+
pm_filemap_update_items(
6140+
PMEM_BUF* buf,
6141+
page_id_t page_id,
6142+
int hashed_id,
6143+
uint64_t bucket_size) {
6144+
6145+
ulint i;
6146+
ulint j;
6147+
uint64_t cur_count;
6148+
uint64_t cur_size;
6149+
6150+
PMEM_FILE_MAP* fm = buf->filemap;
6151+
PMEM_FILE_MAP_ITEM* item;
6152+
6153+
cur_size = fm->size;
6154+
//scan the array, if the input page_id has space exist, increase count
6155+
for (i = 0; i < cur_size; i++) {
6156+
if (fm->items[i]->space_id == page_id.space()) {
6157+
item = fm->items[i];
6158+
cur_count = item->count;
6159+
6160+
//scan in the hashed_id array of the item
6161+
for (j = 0; j < cur_count; j++) {
6162+
if (item->hashed_ids[j] == hashed_id) {
6163+
//This hashed_id already counted
6164+
item->freqs[j]++;
6165+
break;
6166+
}
6167+
}
6168+
6169+
if (j == cur_count) {
6170+
//new hashed id count
6171+
item->hashed_ids[cur_count] = hashed_id;
6172+
item->count++;
6173+
}
6174+
break;
6175+
}
6176+
}
6177+
6178+
if (i == cur_size) {
6179+
//New item
6180+
item = static_cast<PMEM_FILE_MAP_ITEM*> (
6181+
malloc(sizeof(PMEM_FILE_MAP_ITEM)));
6182+
6183+
item->space_id = page_id.space();
6184+
item->count = 0;
6185+
6186+
mutex_enter(&fil_system->mutex);
6187+
6188+
fil_space_t* space = fil_space_get_by_id(item->space_id);
6189+
if( space != NULL) {
6190+
fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
6191+
//bool is_user_ts = fil_is_user_tablespace_id(item->space_id);
6192+
item->name = static_cast<char*> (
6193+
malloc(256));
6194+
strcpy(item->name, node->name);
6195+
}
6196+
6197+
mutex_exit(&fil_system->mutex);
6198+
6199+
item->hashed_ids = static_cast<int*> (
6200+
calloc(bucket_size, sizeof(int)));
6201+
6202+
item->freqs = static_cast<uint64_t*> (
6203+
calloc(bucket_size, sizeof(uint64_t)));
6204+
6205+
item->hashed_ids[item->count] = hashed_id;
6206+
item->freqs[item->count] = 1;
6207+
6208+
item->count++;
6209+
fm->items[cur_size] = item;
6210+
6211+
fm->size++;
6212+
printf("PMEM_PART add item space_id=%zu, name = %s\n", item->space_id, item->name);
6213+
}
6214+
//else
6215+
//This space_id - hashed_id map already count, does nothing
6216+
}
6217+
void
6218+
pm_filemap_close(PMEM_BUF* buf){
6219+
ulint i;
6220+
PMEM_FILE_MAP* fm = buf->filemap;
6221+
PMEM_FILE_MAP_ITEM* item;
6222+
6223+
6224+
for (i = 0; i < fm->size; i++) {
6225+
item = fm->items[i];
6226+
if (buf->filemap->items[i] != NULL) {
6227+
free(buf->filemap->items[i]->name);
6228+
free(buf->filemap->items[i]->hashed_ids);
6229+
buf->filemap->items[i]->hashed_ids = NULL;
6230+
6231+
free(buf->filemap->items[i]->freqs);
6232+
buf->filemap->items[i]->freqs = NULL;
6233+
6234+
free(buf->filemap->items[i]);
6235+
}
6236+
buf->filemap->items[i]=NULL;
6237+
}
6238+
6239+
if (buf->filemap->items != NULL){
6240+
free(buf->filemap->items);
6241+
buf->filemap->items = NULL;
6242+
}
6243+
6244+
free (buf->filemap);
6245+
buf->filemap = NULL;
6246+
}
6247+
void
6248+
pm_filemap_print(
6249+
PMEM_BUF* buf,
6250+
FILE* f){
6251+
ulint i;
6252+
ulint j;
6253+
PMEM_FILE_MAP* fm = buf->filemap;
6254+
PMEM_FILE_MAP_ITEM* item;
6255+
6256+
fprintf(f, "Number of spaces =%zu\n", fm->size);
6257+
6258+
6259+
for (i = 0; i < fm->size; i++) {
6260+
item = fm->items[i];
6261+
6262+
fprintf(f, "==== Space %zu sp_name %s apears on %zu hashed list: ( ", item->space_id, item->name, item->count);
6263+
//print a pair (hashed_id, freq) for each item
6264+
for (j = 0; j < item->count; j++) {
6265+
fprintf(f,"[%zu,%zu] ",
6266+
item->hashed_ids[j],
6267+
item->freqs[j]);
6268+
}
6269+
fprintf(f, " ) ======\n");
6270+
}
6271+
6272+
}
6273+
#endif //UNIV_PMEMOBJ_BUF_PARTITION
6274+
61306275
#endif //UNIV_PMEMOBJ_BUF
61316276

61326277
#ifndef UNIV_HOTBACKUP

storage/innobase/handler/ha_innodb.cc

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3706,25 +3706,18 @@ innobase_init(
37063706
srv_aio_n_slots_per_seg = 256;
37073707
}
37083708
#endif
3709-
#if defined(UNIV_PMEMOBJ_BUF)
3710-
if (!srv_pmem_buf_bucket_size) {
3711-
srv_pmem_buf_bucket_size = 256;
3712-
}
3713-
#endif
3714-
#if defined (UNIV_PMEMOBJ_BUF_FLUSHER)
3715-
if (!srv_pmem_n_flush_threads) {
3716-
srv_pmem_n_flush_threads = 8;
3717-
}
3718-
3719-
#endif
3720-
3721-
#if defined(UNIV_PMEMOBJ_BUF) || defined (UNIV_PMEMOBJ_DBW) || defined (UNIV_PMEMOBJ_LOG)
3709+
#if defined(UNIV_PMEMOBJ_BUF) || defined (UNIV_PMEMOBJ_DBW) || defined (UNIV_PMEMOBJ_LOG) || defined (UNIV_PMEMOBJ_WAL)
37223710
if (!srv_pmem_home_dir) {
37233711
srv_pmem_home_dir = (char*) "/mnt/pmem1";
37243712
}
37253713
if (!srv_pmem_pool_size) {
37263714
srv_pmem_pool_size = 8 * 1024; //8 GB
37273715
}
3716+
#endif
3717+
#if defined(UNIV_PMEMOBJ_BUF)
3718+
if (!srv_pmem_buf_bucket_size) {
3719+
srv_pmem_buf_bucket_size = 256;
3720+
}
37283721
if (!srv_pmem_buf_size) {
37293722
srv_pmem_buf_size = 4 * 1024 ; //4 GB
37303723
}
@@ -3734,7 +3727,20 @@ innobase_init(
37343727
if (!srv_pmem_buf_flush_pct) {
37353728
srv_pmem_buf_flush_pct = 0.9;
37363729
}
3737-
#endif
3730+
#endif
3731+
#if defined (UNIV_PMEMOBJ_BUF_FLUSHER)
3732+
if (!srv_pmem_n_flush_threads) {
3733+
srv_pmem_n_flush_threads = 8;
3734+
}
3735+
#endif
3736+
#if defined (UNIV_PMEMOBJ_BUF_PARTITION)
3737+
if (!srv_pmem_n_space_bits) {
3738+
srv_pmem_n_space_bits = 5;
3739+
}
3740+
if (!srv_pmem_page_per_bucket_bits) {
3741+
srv_pmem_page_per_bucket_bits = 8;
3742+
}
3743+
#endif
37383744
if (!srv_log_group_home_dir) {
37393745
srv_log_group_home_dir = default_path;
37403746
}
@@ -19506,6 +19512,16 @@ static MYSQL_SYSVAR_ULONG(pmem_n_flush_threads, srv_pmem_n_flush_threads,
1950619512

1950719513
#endif
1950819514

19515+
#if defined (UNIV_PMEMOBJ_BUF_PARTITION)
19516+
static MYSQL_SYSVAR_ULONG(pmem_n_space_bits, srv_pmem_n_space_bits,
19517+
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19518+
"Number of bits present a page_no in partition algorithm, from 1 to 32 (space_no is 4-bytes number), default is 5.",
19519+
NULL, NULL, 5, 1, 32,0);
19520+
static MYSQL_SYSVAR_ULONG(pmem_page_per_bucket_bits, srv_pmem_page_per_bucket_bits,
19521+
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19522+
"Number of bits present the maxmum number of pages per space in a bucket in partition algorithm, from 1 to log2(srv_pmem_buf_bucket_size), default is 10.",
19523+
NULL, NULL, 10, 1, 32, 0);
19524+
#endif
1950919525
#if defined (UNIV_PMEMOBJ_BUF) || defined (UNIV_PMEMOBJ_DBW) || defined (UNIV_PMEMOBJ_LOG)
1951019526
static MYSQL_SYSVAR_STR(pmem_home_dir, srv_pmem_home_dir,
1951119527
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -20344,6 +20360,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
2034420360
#if defined (UNIV_PMEMOBJ_BUF_FLUSHER)
2034520361
MYSQL_SYSVAR(pmem_n_flush_threads),
2034620362
#endif
20363+
#if defined (UNIV_PMEMOBJ_BUF_PARTITION)
20364+
MYSQL_SYSVAR(pmem_n_space_bits),
20365+
MYSQL_SYSVAR(pmem_page_per_bucket_bits),
20366+
#endif
2034720367
#if defined (UNIV_PMEMOBJ_BUF) || defined (UNIV_PMEMOBJ_DBW) || defined (UNIV_PMEMOBJ_LOG)
2034820368
MYSQL_SYSVAR(pmem_home_dir),
2034920369
MYSQL_SYSVAR(pmem_pool_size),

storage/innobase/include/log0log.ic

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ Created 12/9/1995 Heikki Tuuri
3333
#include "mtr0types.h"
3434
#endif /* UNIV_LOG_LSN_DEBUG */
3535

36-
#if defined(UNIV_PMEMOBJ_LOG)
37-
//This defined in log0log.cc
36+
//#if defined(UNIV_PMEMOBJ_LOG)
37+
////This defined in log0log.cc
3838
//#include "my_pmemobj.h"
3939
//extern PMEM_WRAPPER* gb_pmw;
40-
#endif
40+
//#endif
4141

4242
/************************************************************//**
4343
Gets a log block flush bit.
@@ -419,7 +419,7 @@ log_reserve_and_write_fast(
419419

420420
log_sys->lsn += len;
421421
//#if defined(UNIV_PMEMOBJ_LOG)
422-
// //Update lsn and buf_free
422+
//Update lsn and buf_free
423423
// gb_pmw->plogbuf->lsn = log_sys->lsn;
424424
// gb_pmw->plogbuf->buf_free = log_sys->buf_free;
425425
//#endif

storage/innobase/include/my_pmemobj.h

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ typedef struct __pmem_flusher PMEM_FLUSHER;
7070

7171
struct __pmem_buf_bucket_stat;
7272
typedef struct __pmem_buf_bucket_stat PMEM_BUCKET_STAT;
73+
74+
struct __pmem_file_map_item;
75+
typedef struct __pmem_file_map_item PMEM_FILE_MAP_ITEM;
76+
77+
struct __pmem_file_map;
78+
typedef struct __pmem_file_map PMEM_FILE_MAP;
79+
80+
struct __pmem_sort_obj;
81+
typedef struct __pmem_sort_obj PMEM_SORT_OBJ;
82+
7383
#endif //UNIV_PMEMOBJ_BUF
7484

7585
POBJ_LAYOUT_BEGIN(my_pmemobj);
@@ -279,20 +289,71 @@ struct __pmem_buf {
279289
PMEM_AIO_PARAM** params_arr;
280290

281291
PMEM_FLUSHER* flusher;
292+
293+
PMEM_FILE_MAP* filemap;
294+
};
295+
296+
// PARTITION //////////////
297+
/*Map space id to hashed_id, for partition purpose
298+
* */
299+
struct __pmem_file_map_item {
300+
uint32_t space_id;
301+
char* name;
302+
303+
int* hashed_ids; //list of hash_id this space appears on
304+
uint64_t count; //number of hashed list this space appears on
305+
306+
uint64_t* freqs; //freq[i] is the number of times this space apearts on hashed_ids[i]
307+
};
308+
struct __pmem_file_map {
309+
PMEMrwlock lock;
310+
311+
uint64_t max_size;
312+
uint64_t size;
313+
PMEM_FILE_MAP_ITEM** items;
282314
};
283315

316+
//this struct for space_oriented sort
317+
struct __pmem_sort_obj {
318+
uint32_t space_no;
319+
320+
uint32_t n_blocks;
321+
uint32_t* block_indexes;
322+
};
323+
324+
void
325+
pm_filemap_init(
326+
PMEM_BUF* buf);
327+
void
328+
pm_filemap_close(PMEM_BUF* buf);
329+
330+
/*Update the page_id in the filemap
331+
*
332+
* */
333+
void
334+
pm_filemap_update_items(
335+
PMEM_BUF* buf,
336+
page_id_t page_id,
337+
int hashed_id,
338+
uint64_t bucket_size);
339+
340+
void
341+
pm_filemap_print(
342+
PMEM_BUF* buf,
343+
FILE* outfile);
344+
284345
#if defined(UNIV_PMEMOBJ_BUF_STAT)
285346
//statistic info about a bucket
286347
//Objects of those struct do not need in PMEM
287348
struct __pmem_buf_bucket_stat {
288349
PMEMrwlock lock;
289350

290-
uint64_t n_writes;
291-
uint64_t n_overwrites;
292-
uint64_t n_reads;
293-
uint64_t n_reads_flushing;
351+
uint64_t n_writes;/*number of writes on the bucket*/
352+
uint64_t n_overwrites;/*number of overwrites on the bucket*/
353+
uint64_t n_reads;/*number of reads on the list (both flushing and normal)*/
354+
uint64_t n_reads_flushing;/*number of reads on the on-flushing list*/
294355
uint64_t max_linked_lists;
295-
uint64_t n_flushed_lists;
356+
uint64_t n_flushed_lists; /*number of of flushes on the bucket*/
296357
};
297358

298359
#endif
@@ -488,6 +549,12 @@ void
488549
pm_buf_flush_list_cleaner_disabled_loop(void);
489550
#endif
490551

552+
ulint
553+
hash_f1(
554+
uint32_t space_no,
555+
uint32_t page_no,
556+
uint64_t n_buckets,
557+
uint64_t page_per_bucket_bits);
491558

492559
#define PMEM_BUF_LIST_INSERT(pop, list, entries, type, func, args) do {\
493560
POBJ_LIST_INSERT_NEW_HEAD(pop, &list.head, entries, sizeof(type), func, &args); \

0 commit comments

Comments
 (0)