forked from jhermes/twissjava
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcassandra.yaml
463 lines (423 loc) · 20 KB
/
cassandra.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
# Cassandra storage config YAML
#NOTE !!!!!!!! NOTE
# See http://wiki.apache.org/cassandra/StorageConfiguration for
# full explanations of configuration directives
#NOTE !!!!!!!! NOTE
# The name of the cluster. This is mainly used to prevent machines in
# one logical cluster from joining another.
cluster_name: 'Test Cluster'
# You should always specify InitialToken when setting up a production
# cluster for the first time, and often when adding capacity later.
# The principle is that each node should be given an equal slice of
# the token ring; see http://wiki.apache.org/cassandra/Operations
# for more details.
#
# If blank, Cassandra will request a token bisecting the range of
# the heaviest-loaded existing node. If there is no load information
# available, such as is the case with a new cluster, it will pick
# a random token, which will lead to hot spots.
initial_token:
# Set to true to make new [non-seed] nodes automatically migrate data
# to themselves from the pre-existing nodes in the cluster. Defaults
# to false because you can only bootstrap N machines at a time from
# an existing cluster of N, so if you are bringing up a cluster of
# 10 machines with 3 seeds you would have to do it in stages. Leaving
# this off for the initial start simplifies that.
auto_bootstrap: false
# See http://wiki.apache.org/cassandra/HintedHandoff
hinted_handoff_enabled: true
# authentication backend, implementing IAuthenticator; used to identify users
authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
# authorization backend, implementing IAuthority; used to limit access/provide permissions
authority: org.apache.cassandra.auth.AllowAllAuthority
# any IPartitioner may be used, including your own as long as it is on
# the classpath. Out of the box, Cassandra provides
# org.apache.cassandra.dht.RandomPartitioner
# org.apache.cassandra.dht.ByteOrderedPartitioner,
# org.apache.cassandra.dht.OrderPreservingPartitioner, and
# org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
# (CollatingOPP colates according to EN,US rules, not naive byte
# ordering. Use this as an example if you need locale-aware collation.)
partitioner: org.apache.cassandra.dht.RandomPartitioner
# directories where Cassandra should store data on disk.
data_file_directories:
- /var/lib/cassandra/data
# commit log
commitlog_directory: /var/lib/cassandra/commitlog
# saved caches
saved_caches_directory: /var/lib/cassandra/saved_caches
# Size to allow commitlog to grow to before creating a new segment
commitlog_rotation_threshold_in_mb: 128
# commitlog_sync may be either "periodic" or "batch."
# When in batch mode, Cassandra won't ack writes until the commit log
# has been fsynced to disk. It will wait up to
# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
# performing the sync.
commitlog_sync: periodic
# the other option is "timed," where writes may be acked immediately
# and the CommitLog is simply synced every commitlog_sync_period_in_ms
# milliseconds.
commitlog_sync_period_in_ms: 10000
# Addresses of hosts that are deemed contact points.
# Cassandra nodes use this list of hosts to find each other and learn
# the topology of the ring. You must change this if you are running
# multiple nodes!
seeds:
- 127.0.0.1
# Access mode. mmapped i/o is substantially faster, but only practical on
# a 64bit machine (which notably does not include EC2 "small" instances)
# or relatively small datasets. "auto", the safe choice, will enable
# mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
# (which may allow you to get part of the benefits of mmap on a 32bit
# machine by mmapping only index files) and "standard".
# (The buffer size settings that follow only apply to standard,
# non-mmapped i/o.)
disk_access_mode: auto
# Unlike most systems, in Cassandra writes are faster than reads, so
# you can afford more of those in parallel. A good rule of thumb is 2
# concurrent reads per processor core. Increase ConcurrentWrites to
# the number of clients writing at once if you enable CommitLogSync +
# CommitLogSyncDelay. -->
concurrent_reads: 8
concurrent_writes: 32
# This sets the amount of memtable flush writer threads. These will
# be blocked by disk io, and each one will hold a memtable in memory
# while blocked. If you have a large heap and many data directories,
# you can increase this value for better flush performance.
# By default this will be set to the amount of data directories defined.
#memtable_flush_writers: 1
# Buffer size to use when performing contiguous column slices.
# Increase this to the size of the column slices you typically perform
sliced_buffer_size_in_kb: 64
# TCP port, for commands and data
storage_port: 7000
# Address to bind to and tell other Cassandra nodes to connect to. You
# _must_ change this if you want multiple nodes to be able to
# communicate!
#
# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
# will always do the Right Thing *if* the node is properly configured
# (hostname, name resolution, etc), and the Right Thing is to use the
# address associated with the hostname (it might not be).
#
# Setting this to 0.0.0.0 is always wrong.
listen_address: localhost
# The address to bind the Thrift RPC service to -- clients connect
# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
# you want Thrift to listen on all interfaces.
#
# Leaving this blank has the same effect it does for ListenAddress,
# (i.e. it will be based on the configured hostname of the node).
rpc_address: localhost
# port for Thrift to listen for clients on
rpc_port: 9160
# enable or disable keepalive on rpc connections
rpc_keepalive: true
# uncomment to set socket buffer sizes on rpc connections
# rpc_send_buff_size_in_bytes:
# rpc_recv_buff_size_in_bytes:
# Frame size for thrift (maximum field length).
# 0 disables TFramedTransport in favor of TSocket. This option
# is deprecated; we strongly recommend using Framed mode.
thrift_framed_transport_size_in_mb: 15
# The max length of a thrift message, including all fields and
# internal thrift overhead.
thrift_max_message_length_in_mb: 16
# Whether or not to take a snapshot before each compaction. Be
# careful using this option, since Cassandra won't clean up the
# snapshots for you. Mostly useful if you're paranoid when there
# is a data format change.
snapshot_before_compaction: false
# change this to increase the compaction thread's priority. In java, 1 is the
# lowest priority and that is our default.
# compaction_thread_priority: 1
# The threshold size in megabytes the binary memtable must grow to,
# before it's submitted for flushing to disk.
binary_memtable_throughput_in_mb: 256
# Add column indexes to a row after its contents reach this size.
# Increase if your column values are large, or if you have a very large
# number of columns. The competing causes are, Cassandra has to
# deserialize this much of the row to read a single column, so you want
# it to be small - at least if you do many partial-row reads - but all
# the index data is read for each access, so you don't want to generate
# that wastefully either.
column_index_size_in_kb: 64
# Size limit for rows being compacted in memory. Larger rows will spill
# over to disk and use a slower two-pass compaction process. A message
# will be logged specifying the row key.
in_memory_compaction_limit_in_mb: 64
# Time to wait for a reply from other nodes before failing the command
rpc_timeout_in_ms: 10000
# phi value that must be reached for a host to be marked down.
# most users should never need to adjust this.
# phi_convict_threshold: 8
# endpoint_snitch -- Set this to a class that implements
# IEndpointSnitch, which will let Cassandra know enough
# about your network topology to route requests efficiently.
# Out of the box, Cassandra provides
# - org.apache.cassandra.locator.SimpleSnitch:
# Treats Strategy order as proximity. This improves cache locality
# when disabling read repair, which can further improve throughput.
# - org.apache.cassandra.locator.RackInferringSnitch:
# Proximity is determined by rack and data center, which are
# assumed to correspond to the 3rd and 2nd octet of each node's
# IP address, respectively
# org.apache.cassandra.locator.PropertyFileSnitch:
# - Proximity is determined by rack and data center, which are
# explicitly configured in cassandra-rack.properties.
endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
# dynamic_snitch -- This boolean controls whether the above snitch is
# wrapped with a dynamic snitch, which will monitor read latencies
# and avoid reading from hosts that have slowed (due to compaction,
# for instance)
dynamic_snitch: true
# controls how often to perform the more expensive part of host score
# calculation
dynamic_snitch_update_interval_in_ms: 100
# controls how often to reset all host scores, allowing a bad host to
# possibly recover
dynamic_snitch_reset_interval_in_ms: 600000
# if set greater than zero and read_repair_chance is < 1.0, this will allow
# 'pinning' of replicas to hosts in order to increase cache capacity.
# The badness threshold will control how much worse the pinned host has to be
# before the dynamic snitch will prefer other replicas over it. This is
# expressed as a double which represents a percentage. Thus, a value of
# 0.2 means Cassandra would continue to prefer the static snitch values
# until the pinned host was 20% worse than the fastest.
dynamic_snitch_badness_threshold: 0.0
# request_scheduler -- Set this to a class that implements
# RequestScheduler, which will schedule incoming client requests
# according to the specific policy. This is useful for multi-tenancy
# with a single Cassandra cluster.
# NOTE: This is specifically for requests from the client and does
# not affect inter node communication.
# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
# client requests to a node with a separate queue for each
# request_scheduler_id. The scheduler is further customized by
# request_scheduler_options as described below.
request_scheduler: org.apache.cassandra.scheduler.NoScheduler
# Scheduler Options vary based on the type of scheduler
# NoScheduler - Has no options
# RoundRobin
# - throttle_limit -- The throttle_limit is the number of in-flight
# requests per client. Requests beyond
# that limit are queued up until
# running requests can complete.
# The value of 80 here is twice the number of
# concurrent_reads + concurrent_writes.
# - default_weight -- default_weight is optional and allows for
# overriding the default which is 1.
# - weights -- Weights are optional and will default to 1 or the
# overridden default_weight. The weight translates into how
# many requests are handled during each turn of the
# RoundRobin, based on the scheduler id.
#
# request_scheduler_options:
# throttle_limit: 80
# default_weight: 5
# weights:
# Keyspace1: 1
# Keyspace2: 5
# request_scheduler_id -- An identifer based on which to perform
# the request scheduling. Currently the only valid option is keyspace.
# request_scheduler_id: keyspace
# The Index Interval determines how large the sampling of row keys
# is for a given SSTable. The larger the sampling, the more effective
# the index is at the cost of space.
index_interval: 128
# A ColumnFamily is the Cassandra concept closest to a relational table.
#
# Keyspaces are separate groups of ColumnFamilies. Except in very
# unusual circumstances you will have one Keyspace per application.
#
# Keyspace required parameters:
# - name: name of the keyspace; "system" and "definitions" are
# reserved for Cassandra Internals.
# - replica_placement_strategy: the class that determines how replicas
# are distributed among nodes. Contains both the class as well as
# configuration information. Must extend AbstractReplicationStrategy.
# Out of the box, Cassandra provides
# * org.apache.cassandra.locator.SimpleStrategy
# * org.apache.cassandra.locator.NetworkTopologyStrategy
# * org.apache.cassandra.locator.OldNetworkTopologyStrategy
#
# SimpleStrategy merely places the first
# replica at the node whose token is closest to the key (as determined
# by the Partitioner), and additional replicas on subsequent nodes
# along the ring in increasing Token order.
#
# With NetworkTopologyStrategy,
# for each datacenter, you can specify how many replicas you want
# on a per-keyspace basis. Replicas are placed on different racks
# within each DC, if possible. This strategy also requires rack aware
# snitch, such as RackInferringSnitch or PropertyFileSnitch.
# An example:
# - name: Keyspace1
# replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy
# strategy_options:
# DC1 : 3
# DC2 : 2
# DC3 : 1
#
# OldNetworkToplogyStrategy [formerly RackAwareStrategy]
# places one replica in each of two datacenters, and the third on a
# different rack in in the first. Additional datacenters are not
# guaranteed to get a replica. Additional replicas after three are placed
# in ring order after the third without regard to rack or datacenter.
#
# - replication_factor: Number of replicas of each row
# - column_families: column families associated with this keyspace
#
# ColumnFamily required parameters:
# - name: name of the ColumnFamily. Must not contain the character "-".
# - compare_with: tells Cassandra how to sort the columns for slicing
# operations. The default is BytesType, which is a straightforward
# lexical comparison of the bytes in each column. Other options are
# AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
# and IntegerType (a generic variable-length integer type).
# You can also specify the fully-qualified class name to a class of
# your choice extending org.apache.cassandra.db.marshal.AbstractType.
#
# ColumnFamily optional parameters:
# - keys_cached: specifies the number of keys per sstable whose
# locations we keep in memory in "mostly LRU" order. (JUST the key
# locations, NOT any column values.) Specify a fraction (value less
# than 1) or an absolute number of keys to cache. Defaults to 200000
# keys.
# - rows_cached: specifies the number of rows whose entire contents we
# cache in memory. Do not use this on ColumnFamilies with large rows,
# or ColumnFamilies with high write:read ratios. Specify a fraction
# (value less than 1) or an absolute number of rows to cache.
# Defaults to 0. (i.e. row caching is off by default)
# - comment: used to attach additional human-readable information about
# the column family to its definition.
# - read_repair_chance: specifies the probability with which read
# repairs should be invoked on non-quorum reads. must be between 0
# and 1. defaults to 1.0 (always read repair).
# - gc_grace_seconds: specifies the time to wait before garbage
# collecting tombstones (deletion markers). defaults to 864000 (10
# days). See http://wiki.apache.org/cassandra/DistributedDeletes
# - default_validation_class: specifies a validator class to use for
# validating all the column values in the CF.
# - min_compaction_threshold: the minimum number of SSTables needed
# to start a minor compaction. increasing this will cause minor
# compactions to start less frequently and be more intensive. setting
# this to 0 disables minor compactions. defaults to 4.
# - max_compaction_threshold: the maximum number of SSTables allowed
# before a minor compaction is forced. decreasing this will cause
# minor compactions to start more frequently and be less intensive.
# setting this to 0 disables minor compactions. defaults to 32.
# - row_cache_save_period_in_seconds: number of seconds between saving
# row caches. The row caches can be saved periodically and if one
# exists on startup it will be loaded.
# - key_cache_save_period_in_seconds: number of seconds between saving
# key caches. The key caches can be saved periodically and if one
# exists on startup it will be loaded.
# - memtable_flush_after_mins: The maximum time to leave a dirty table
# unflushed. This should be large enough that it won't cause a flush
# storm of all memtables during periods of inactivity.
# - memtable_throughput_in_mb: The maximum size of the memtable before
# it is flushed. If undefined, 1/8 * heapsize will be used.
# - memtable_operations_in_millions: Number of operations in millions
# before the memtable is flushed. If undefined, throughput / 64 * 0.3
# will be used.
#
keyspaces:
- column_families:
- column_metadata:
- !!org.apache.cassandra.config.RawColumnDefinition
name: password
validator_class: org.apache.cassandra.db.marshal.UTF8Type
column_type: Standard
comment: ''
compare_with: org.apache.cassandra.db.marshal.UTF8Type
gc_grace_seconds: 864000
key_cache_save_period_in_seconds: 3600
keys_cached: 200000.0
max_compaction_threshold: 32
memtable_flush_after_mins: 60
min_compaction_threshold: 4
name: User
read_repair_chance: 1.0
row_cache_save_period_in_seconds: 0
rows_cached: 0.0
- column_type: Standard
comment: ''
compare_with: org.apache.cassandra.db.marshal.LongType
default_validation_class: org.apache.cassandra.db.marshal.LexicalUUIDType
gc_grace_seconds: 864000
key_cache_save_period_in_seconds: 3600
keys_cached: 200000.0
max_compaction_threshold: 32
memtable_flush_after_mins: 60
min_compaction_threshold: 4
name: Userline
read_repair_chance: 1.0
row_cache_save_period_in_seconds: 0
rows_cached: 0.0
- column_type: Standard
comment: ''
compare_with: org.apache.cassandra.db.marshal.UTF8Type
default_validation_class: org.apache.cassandra.db.marshal.UTF8Type
gc_grace_seconds: 864000
key_cache_save_period_in_seconds: 3600
keys_cached: 200000.0
max_compaction_threshold: 32
memtable_flush_after_mins: 60
min_compaction_threshold: 4
name: Followers
read_repair_chance: 1.0
row_cache_save_period_in_seconds: 0
rows_cached: 0.0
- column_metadata:
- !!org.apache.cassandra.config.RawColumnDefinition
name: body
validator_class: org.apache.cassandra.db.marshal.UTF8Type
- !!org.apache.cassandra.config.RawColumnDefinition
name: username
validator_class: org.apache.cassandra.db.marshal.UTF8Type
column_type: Standard
comment: ''
compare_with: org.apache.cassandra.db.marshal.UTF8Type
gc_grace_seconds: 864000
key_cache_save_period_in_seconds: 3600
keys_cached: 200000.0
max_compaction_threshold: 32
memtable_flush_after_mins: 60
min_compaction_threshold: 4
name: Tweet
read_repair_chance: 1.0
row_cache_save_period_in_seconds: 0
rows_cached: 0.0
- column_type: Standard
comment: ''
compare_with: org.apache.cassandra.db.marshal.UTF8Type
default_validation_class: org.apache.cassandra.db.marshal.UTF8Type
gc_grace_seconds: 864000
key_cache_save_period_in_seconds: 3600
keys_cached: 200000.0
max_compaction_threshold: 32
memtable_flush_after_mins: 60
min_compaction_threshold: 4
name: Friends
read_repair_chance: 1.0
row_cache_save_period_in_seconds: 0
rows_cached: 0.0
- column_type: Standard
comment: ''
compare_with: org.apache.cassandra.db.marshal.LongType
default_validation_class: org.apache.cassandra.db.marshal.LexicalUUIDType
gc_grace_seconds: 864000
key_cache_save_period_in_seconds: 3600
keys_cached: 200000.0
max_compaction_threshold: 32
memtable_flush_after_mins: 60
min_compaction_threshold: 4
name: Timeline
read_repair_chance: 1.0
row_cache_save_period_in_seconds: 0
rows_cached: 0.0
name: Twissjava
replica_placement_strategy: org.apache.cassandra.locator.SimpleStrategy
replication_factor: 1