Des microservices efficaces et fiables


200 . JVM- - Cassandra, . , .


- ? , ? ? .


, , , .



c, . , 6 000 , 15 000 , , β€” , .


. , .


, - . , .



( , ).


, . , 95% 5% .


, β€” . 80% 13 . , β€” 13 .


, :



, . web- , .


, :


  • 600
  • 5
  • 100
  • 120
  • 8

, . , . , . , , .


β€” , :



β€” ID , ID ( ), , ( , , ), - , , .


:


CREATE TABLE Messages (
    chatId, msgId

    user, type, text, attachments[], terminal, deletedBy[], replyTo…

    PRIMARY KEY ( chatId, msgId )
)

:


  • getMessages(viewer, chat, from, to), . (, ).
  • getLastMessages(viewer, chats) . , .
  • , add(chat, message), search(viewer, text) indexMessages().

.





( , - ) , -, - .


. , . Cassandra.


DBA, , , :



, . getMessages().


:


SELECT FROM Messages
    WHERE chatId = ? AND
    msgId BETWEEN :from AND :to

: . , .


. β€” , , ? . , :



? , , 5% 95% . .


DBA , . , , .


, . getLastMessages() indexMessages() , . , , 95% , - , 5%. - , , , LRU. .


, , , , .


- , , Memcached, Redis Tarantool, . memcache .


, : , :



, .





β€” CPU . β€” , , , . , .


, memcache, . , , , , . , , .


:



? , .


. Atul Adya, Robert Grandl, Daniel Myers
Google Henry Qin , , , 85%.



27% , , low-latency .


β€” . , memcache, .


13 memcache, , 13, , , , 12 13, .


, 10% , 46% CPU 86% , .


β€” . memcache , -, .


, , .



N , . .


β€” . , .





memcache , : memcache .


memcache 99.99.


Redis Tarantool memcache, , .


NetCache β€” , memcache .


KV-Direct .


, - . , , . .





. , , .



:


-, , . .


-, /.


-, , , .


, «» . .


DBA . .


, , . , ? , , ?


, .





, , β€” . , . ?


1, 2 ( ) : , , .
3, 4: .
5 . .
6 β€” , . , memcache.
7 , . . - , , retries, - .


, , , . , memcache .



? memcache? , memcache , . ( ).


? , . .
memcache -, , . , .


, «» . , , - .



, , .


, , . , .


, , . , . . K, 1/3 K, .


, , . .



, . , . , . , .


. . , .


, ? ? . memcache , . K.


, p, P :


P(K) = 1 – (1 – p)3


, . , :


P(1/3 K) = 1 – (1 – p)3


K, . :


P(K) = p3


, , .


, p 0.1 ( 10%), , :



P(K) = 1 – (1 – p)3 = 1 – (1 – 0.1)3 = 0.271


:
P(K) = p3 = 0.13 = 0.001


, .


, . , , memcache . : , , β€” p = 0.01 2 , p = 0.001 , .. , 1/3 K ( p = 0.01), (p = 0.001) .


, .




, !


? :


  • . , - .
  • , . , , , .

, , , , - . «», «» «» , - , .


β€” . . Java, , Java. : β€” , .


: open source, Java, β€” , Cassandra.


. Cassandra, , CassandraDaemon.


, , β€” classpath ( -cp cassandra/lib/*.jar ) - :


System.setProperty( "cassandra.config", "file://whatever/cassandra.yaml" );
CassandraDaemon.instance.activate();

, CassandraDaemon (, Cassandra, , cassandra.yaml β€” ). activate(), .


assandra.yaml , ConfigurationLoader, , Cassandra .


β€” β€” . , .



, . , B. - , , . , , :



. .


- β€” , A, B C. , , - : , .


B, . , .



, , . , B. , , .


. , .


. Cassandra . , .


Cassandra , :
Partition Key (chatId) β€” .
Clustering Key (msgId), , , .


CREATE TABLE Messages (
    chatId, msgId

    user, type, text, attachments[], terminal, deletedBy[], replyTo…

    PRIMARY KEY ( chatId, msgId )
) 

Partition Key . , Clustering Key, β€” . .


, β€” , Partition Key ID , Clustering Key β€” ID .



Cassandra :



, .


SortedMap<Token, List<InetAddress>> endpointMap = …

AbstractReplicationStrategy replication = …

for (Token token : tokenMetadata.sortedTokens() ) {
    endpointMap.put( token, replication.getNaturalEndpoints( token ) );
}

endpointMap . endpointMap , . , , , .


, , 30 . , , , . , , . : , push- . , .


, , .



:


CREATE TABLE Messages (
    chatId, msgId

    user, type, text, attachments[], terminal, deletedBy[], replyTo…

    PRIMARY KEY ( chatId, msgId )
) 

, :


SELECT FROM Messages
    WHERE chatId = ? AND
    msgId BETWEEN :from AND :to

, .


? , .


import com.datastax.oss.driver.api.core.CqlSession;
import com.datastax.oss.driver.api.core.cql.*;

try (CqlSession session = CqlSession.builder().build()) { // (1)
    ResultSet rs = session.execute("select release_version from system.local"); // (2)
    Row row = rs.one();
    System.out.println(row.getString("release_version")); // (3)
}

( , 1-3):


  1. ,
  2. , ResultSet

, - , .


Cassandra QueryProcessor, execute , :


package org.apache.cassandra.cql3;

import java.nio.ByteBuffer;

public class QueryProcessor
{
    public static UntypedResultSet execute(String query,
        ConsistencyLevel cl, Object… values)
        throws RequestExecutionException
}

, β€” . ? , . , . , , , .


, getMessages() :


UntypedResultSet rs = QueryProcessor.execute(
        "SELECT * FROM Messages "
        + "WHERE chatId = ? AND msgId < ? AND msgId > ?"
        ConsistencyLevel.QUORUM, chatId, from, to );

rs.forEach ( row -> {} );

add() :


QueryProcessor.execute( "INSERT INTO Messages VALUES (?,?,?,...)", cl, values );

. QueryProcessor, . , , .





, . , . , :


  • 600
  • 100
  • 5
  • 5%
  • 13

, , :


  • 3+
  • 250
  • 500

. replication factor.


, , - (getMessages(), getLastMessages(), add(), search(), indexMessages()). , , .


getMessages() :


  • ok.ru . , - getMessages().
  • , .
  • , , QueryProcessor, .
  • .
  • , .

:



.


, , , :



, , add(). , :



, - :



, , getMessages(), . , . , :




, - , .


: , , . , - added():



, . ?


, , , , :



, , . , ? ? , - ?


, add() . , , . .


, Cassandra. INSERT , , () .



β€” , .



Cassandra .


, . Hint.



, - - .


Read Repair β€” . .



, , Streaming Repair β€” - .


, , . Streaming.



, . .


, , . , , (Hint, Read Repair Streaming Repair) , , .



listener' Cassandra :


interface ApplyMutationListener
{
    void onApply(ByteBuffer key,
        DeletionTime deletion,
        Iterator<Unfiltered> atoms);
}

, , Cassandra . , .


, , , .


package org.apache.cassandra.db

public class Keyspace
{
    public void apply( Mutation mutation,
        boolean writeCommitLog,
        boolean updateIndexes.
        boolean isDroppable )

// ...
}

Mutation . , listener.


Streaming, , (- Repair, - ), OnCompletionRunnable:


package org.apache.cassandra.streaming

public class StreamReceiveTask extends StreamTask
{
    // holds references to SSTables received
    protected Collection<SSTableReader> sstables;

    private static class OnCompletionRunnable implements Runnable {

    // ...
    }
}

, ApplyMutationListener.



: , , , - , , .



, . Hints , :



getMessages() . , , .



, . , , WAL. , WAL. Cassandra , , , . keyspace, LocalStrategy:


CREATE KEYSPACE Caches
    WITH REPLICATION = {
    'class': 'LocalStrategy'
    }

LocalStrategy , keyspace , .


keyspace :


CREATE TABLE Caches.MessagesSnapshot (
    rowkey blob,
    value blob,
    PRIMARY KEY ( rowkey )
)

put , , eviction β€” :



, , , :


SELECT * FROM MessagesSnapshot

, β€” , , .


β€” β€” .


β€” , (, - ). .


, . β€” mapped byte buffer : /dev/shm/msgs-cache.mem.


, , one-nio, GitHub. SharedMemoryMap , , SharedMemory.


, /dev/shm tmpfs, . hugetlbfs, , , , TLB misses .


, , SharedMemoryMap , .



, : , , . … .



? by design. Cassandra , .


15 , , .


: , , , . , .



, , β€” . , .


, : , , « ». , «». .


: , , , , .




!


, .


getLastMessages(chats[]) β€” .


, . , , .


, , . , , .


, , , . , , Cassandra: hints, read repair, streaming repair.


, . , - , .


? , memcaches , , :



, . β€” , .


«» ( ), , ID .



β€” ( ), , , .


, , .



. - , , . , , , :



, , , . , :



API Cassandra . - Cassandra JVM, API.


- :


public Message getLastMessage(Long chatId) {

    Prepared prepared = QueryProcessor.prepareInternal( "SELECT … WHERE chatId=?" );
    SelectStatement select = (SelectStatement) prepared.statement;

    QueryOptions opt = QueryProcessor.makeInternalOptions( prepared, chatId );

    ReadQuery query = select.getQuery( opt, FBUtilities.nowInSeconds() );

    UnfilteredPartitionIterator partitions = query.executeLocally( query.executionController() );

    UnfilteredRowIterator partition = partitions.next();
    partition.forEachRemaining( atom -> { /* unmarshalling code */ } );
}

- , : statement, , executeLocally(). , , , .



: . - .



, β€” . (, , - ).


, , , Apache Lucene.


. , ( ). β€” .


( ) , .


, . -. add(), , , -, IndexWriter.addDocument(), IndexWriter.commit():



: , .


, . , , . .


. . IndexWriter.addDocument(), IndexWriter.commit() . ? , , . , .


? , , - , . , , , .


Cassandra , . Compaction. , Cassandra .


Clustering Key, , , , .


Cassandra :


package org.apache.cassandra.db.compaction;

public class CompactionManager implements CompactionManagerMBean {
// …
}

, , , .


, , Compaction, , β€” . .





, .


, β€” .


- , . ?


  • , . write-ahead .
  • , β€” Caches.MessagesSnapshot. β€” , . CPU.
  • , , , .
  • .

?


, .


: , , - . -, .


: .


: , , -.


DBA : .



DBA , . ?


, , . β€” , . , , , -.


β€” .


, , JVM. - , , , . , , , .


? , , : - , .


, β€” -, . , - .


β€” - , , .


, β€” .


API, . DBA: API, ! ?


, . Cassandra, API, , , . , β€” .


, Cassandra, rolling upgrades. , . . c , , -, , , , - .


β€” , - .





  • , .
  • ́ . , , . .
  • , Cassandra one-nio.
  • . , , , : , , , , .

, , :



All Articles