
200 . JVM- - Cassandra, . , .
- ? , ? ? .
, , , .
c, . , 6 000 , 15 000 , , β , .
. , .
, - . , .

( , ).
, . , 95% 5% .
, β . 80% 13 . , β 13 .
, :

, . web- , .
, :
, . , . , . , , .
β , :

β ID , ID ( ), , ( , , ), - , , .
:
CREATE TABLE Messages (
chatId, msgId
user, type, text, attachments[], terminal, deletedBy[], replyToβ¦
PRIMARY KEY ( chatId, msgId )
)
:
- getMessages(viewer, chat, from, to), . (, ).
- getLastMessages(viewer, chats) . , .
- , add(chat, message), search(viewer, text) indexMessages().
.
( , - ) , -, - .
. , . Cassandra.
DBA, , , :

, . getMessages().
:
SELECT FROM Messages
WHERE chatId = ? AND
msgId BETWEEN :from AND :to
: . , .
. β , , ? . , :

? , , 5% 95% . .
DBA , . , , .
, . getLastMessages() indexMessages() , . , , 95% , - , 5%. - , , , LRU. .
, , , , .
- , , Memcached, Redis Tarantool, . memcache .
, : , :

, .
β CPU . β , , , . , .
, memcache, . , , , , . , , .
:

? , .
. Atul Adya, Robert Grandl, Daniel Myers
Google Henry Qin , , , 85%.

27% , , low-latency .
β . , memcache, .
13 memcache, , 13, , , , 12 13, .
, 10% , 46% CPU 86% , .
β . memcache , -, .
, , .

N , . .
β . , .
memcache , : memcache .
memcache 99.99.
Redis Tarantool memcache, , .
NetCache β , memcache .
KV-Direct .
, - . , , . .
. , , .

:
-, , . .
-, /.
-, , , .
, «» . .
DBA . .
, , . , ? , , ?
, .
, , β . , . ?
1, 2 ( ) : , , .
3, 4: .
5 . .
6 β , . , memcache.
7 , . . - , , retries, - .
, , , . , memcache .

? memcache? , memcache , . ( ).
? , . .
memcache -, , . , .
, «» . , , - .

, , .
, , . , .
, , . , . . K, 1/3 K, .
, , . .

, . , . , . , .
. . , .
, ? ? . memcache , . K.
, p, P :
P(K) = 1 β (1 β p)3
, . , :
P(1/3 K) = 1 β (1 β p)3
K, . :
P(K) = p3
, , .
, p 0.1 ( 10%), , :
P(K) = 1 β (1 β p)3 = 1 β (1 β 0.1)3 = 0.271
:
P(K) = p3 = 0.13 = 0.001
, .
, . , , memcache . : , , β p = 0.01 2 , p = 0.001 , .. , 1/3 K ( p = 0.01), (p = 0.001) .
, .
, !
? :
, , , , - . «», «» «» , - , .
β . . Java, , Java. : β , .
: open source, Java, β , Cassandra.
. Cassandra, , CassandraDaemon.
, , β classpath ( -cp cassandra/lib/*.jar ) - :
System.setProperty( "cassandra.config", "file://whatever/cassandra.yaml" );
CassandraDaemon.instance.activate();
, CassandraDaemon (, Cassandra, , cassandra.yaml β ). activate(), .
assandra.yaml , ConfigurationLoader, , Cassandra .
β β . , .
, . , B. - , , . , , :

. .
- β , A, B C. , , - : , .
B, . , .

, , . , B. , , .
. , .
. Cassandra . , .
Cassandra , :
Partition Key (chatId) β .
Clustering Key (msgId), , , .
CREATE TABLE Messages (
chatId, msgId
user, type, text, attachments[], terminal, deletedBy[], replyToβ¦
PRIMARY KEY ( chatId, msgId )
)
Partition Key . , Clustering Key, β . .
, β , Partition Key ID , Clustering Key β ID .

Cassandra :
, .
SortedMap<Token, List<InetAddress>> endpointMap = β¦
AbstractReplicationStrategy replication = β¦
for (Token token : tokenMetadata.sortedTokens() ) {
endpointMap.put( token, replication.getNaturalEndpoints( token ) );
}
endpointMap . endpointMap , . , , , .
, , 30 . , , , . , , . : , push- . , .
, , .
:
CREATE TABLE Messages (
chatId, msgId
user, type, text, attachments[], terminal, deletedBy[], replyToβ¦
PRIMARY KEY ( chatId, msgId )
)
, :
SELECT FROM Messages
WHERE chatId = ? AND
msgId BETWEEN :from AND :to
, .
? , .
import com.datastax.oss.driver.api.core.CqlSession;
import com.datastax.oss.driver.api.core.cql.*;
try (CqlSession session = CqlSession.builder().build()) {
ResultSet rs = session.execute("select release_version from system.local");
Row row = rs.one();
System.out.println(row.getString("release_version"));
}
( , 1-3):
- ,
- , ResultSet
, - , .
Cassandra QueryProcessor, execute , :
package org.apache.cassandra.cql3;
import java.nio.ByteBuffer;
public class QueryProcessor
{
public static UntypedResultSet execute(String query,
ConsistencyLevel cl, Object⦠values)
throws RequestExecutionException
}
, β . ? , . , . , , , .
, getMessages() :
UntypedResultSet rs = QueryProcessor.execute(
"SELECT * FROM Messages "
+ "WHERE chatId = ? AND msgId < ? AND msgId > ?"
ConsistencyLevel.QUORUM, chatId, from, to );
rs.forEach ( row -> {} );
add() :
QueryProcessor.execute( "INSERT INTO Messages VALUES (?,?,?,...)", cl, values );
. QueryProcessor, . , , .
, . , . , :
, , :
. replication factor.
, , - (getMessages(), getLastMessages(), add(), search(), indexMessages()). , , .
getMessages() :
- ok.ru . , - getMessages().
- , .
- , , QueryProcessor, .
- .
- , .
:

.
, , , :

, , add(). , :

, - :

, , getMessages(), . , . , :

, - , .
: , , . , - added():

, . ?
, , , , :

, , . , ? ? , - ?
, add() . , , . .
, Cassandra. INSERT , , () .

β , .

Cassandra .
, . Hint.

, - - .
Read Repair β . .

, , Streaming Repair β - .
, , . Streaming.

, . .
, , . , , (Hint, Read Repair Streaming Repair) , , .

listener' Cassandra :
interface ApplyMutationListener
{
void onApply(ByteBuffer key,
DeletionTime deletion,
Iterator<Unfiltered> atoms);
}
, , Cassandra . , .
, , , .
package org.apache.cassandra.db
public class Keyspace
{
public void apply( Mutation mutation,
boolean writeCommitLog,
boolean updateIndexes.
boolean isDroppable )
}
Mutation . , listener.
Streaming, , (- Repair, - ), OnCompletionRunnable:
package org.apache.cassandra.streaming
public class StreamReceiveTask extends StreamTask
{
protected Collection<SSTableReader> sstables;
private static class OnCompletionRunnable implements Runnable {
}
}
, ApplyMutationListener.
: , , , - , , .

, . Hints , :

getMessages() . , , .

, . , , WAL. , WAL. Cassandra , , , . keyspace, LocalStrategy:
CREATE KEYSPACE Caches
WITH REPLICATION = {
'class': 'LocalStrategy'
}
LocalStrategy , keyspace , .
keyspace :
CREATE TABLE Caches.MessagesSnapshot (
rowkey blob,
value blob,
PRIMARY KEY ( rowkey )
)
put , , eviction β :

, , , :
SELECT * FROM MessagesSnapshot
, β , , .
β β .
β , (, - ). .
, . β mapped byte buffer : /dev/shm/msgs-cache.mem.
, , one-nio, GitHub. SharedMemoryMap , , SharedMemory.
, /dev/shm tmpfs, . hugetlbfs, , , , TLB misses .
, , SharedMemoryMap , .
, : , , . β¦ .

? by design. Cassandra , .
15 , , .
: , , , . , .

, , β . , .
, : , , « ». , «». .
: , , , , .
!
, .
getLastMessages(chats[]) β .
, . , , .
, , . , , .
, , , . , , Cassandra: hints, read repair, streaming repair.
, . , - , .
? , memcaches , , :

, . β , .
«» ( ), , ID .

β ( ), , , .
, , .
. - , , . , , , :

, , , . , :

API Cassandra . - Cassandra JVM, API.
- :
public Message getLastMessage(Long chatId) {
Prepared prepared = QueryProcessor.prepareInternal( "SELECT β¦ WHERE chatId=?" );
SelectStatement select = (SelectStatement) prepared.statement;
QueryOptions opt = QueryProcessor.makeInternalOptions( prepared, chatId );
ReadQuery query = select.getQuery( opt, FBUtilities.nowInSeconds() );
UnfilteredPartitionIterator partitions = query.executeLocally( query.executionController() );
UnfilteredRowIterator partition = partitions.next();
partition.forEachRemaining( atom -> { } );
}
- , : statement, , executeLocally(). , , , .
: . - .

, β . (, , - ).
, , , Apache Lucene.
. , ( ). β .
( ) , .
, . -. add(), , , -, IndexWriter.addDocument(), IndexWriter.commit():

: , .
, . , , . .
. . IndexWriter.addDocument(), IndexWriter.commit() . ? , , . , .
? , , - , . , , , .
Cassandra , . Compaction. , Cassandra .
Clustering Key, , , , .
Cassandra :
package org.apache.cassandra.db.compaction;
public class CompactionManager implements CompactionManagerMBean {
}
, , , .
, , Compaction, , β . .
, .
, β .
- , . ?
- , . write-ahead .
- , β Caches.MessagesSnapshot. β , . CPU.
- , , , .
- .
?
, .
: , , - . -, .
: .
: , , -.
DBA : .

DBA , . ?
, , . β , . , , , -.
β .
, , JVM. - , , , . , , , .
? , , : - , .
, β -, . , - .
β - , , .
, β .
API, . DBA: API, ! ?
, . Cassandra, API, , , . , β .
, Cassandra, rolling upgrades. , . . c , , -, , , , - .
β , - .
- , .
- Μ . , , . .
- , Cassandra one-nio.
- . , , , : , , , , .
, , :