What algorithms do Yandex developers implement every day

The debate about whether developers need to write algorithmic code for interviews is endless. In support of a positive answer, I have already published a story about algorithmic sections with writing code in Yandex and examples of tasks that can be found there. Now I want to develop this topic and show examples of real production code.




All examples were once written by specific developers in the process of solving fairly routine tasks. I didn’t improve the code before publication, I only adapted it in places so that it was understandable without getting to know our code base. Therefore, some code examples may not seem cool enough to you, but in the conditions of constant pressure of deadlines it is impossible to grind absolutely the entire code.


. C++, TypeScript Python. — , .


1. .


2016- .. , , . :



.


, , « », . . - - — , , : « », «», «». , - - «» «», .


. , , : - , ; . , -, . :


bool ContainsStopHash(const TString& text,
                      TEasyParser& easyParser,
                      const THashSet<size_t>& stopHashes)
{
    std::vector<TString> words;
    easyParser.ParseUTF8Text(text, &words);

    size_t unigramHash = 0;
    size_t bigramHash = 0;

    for (const TString& word : words) {
        const size_t lastUnigramHash = unigramHash;
        const size_t wordHash = word.hash();
        unigramHash = wordHash;
        bigramHash ^= wordHash;
        if (stopHashes.contains(unigramHash) || stopHashes.contains(bigramHash)) {
            return true;
        }
        bigramHash ^= lastUnigramHash;
    }
    return false;
}

TEasyParser — , , , THashSetstd::unordered_set, TString — .


— , : , . , , .


2. Reservoir sampling MapReduce


, - , . , , .


. , — , - - .




, , , , - . , std::shuffle.


, . , . , ; i, 0 i .


. , , K — , . K , , K . std::shuffle : K — , , .


: i- 0 i . , K — , - . reducer' :


void Do(TMRReader* input, TMRWriter* output) override {
    TVector<TNode> sample;

    TMersenne<ui64> mersenne;
    size_t passedItems = 0;

    for (; input->IsValid(); input->Next()) {
        ++passedItems;
        size_t position = mersenne.GenRand64() % passedItems;

        if (position >= ItemsToTake) {
            continue;
        }

        if (sample.size() < ItemsToTake) {
            sample.push_back(input->GetRow());
        } else {
            sample[position] = input->GetRow();
        }
    }

    Shuffle(sample.begin(), sample.end(), mersenne);
    for (const TNode& node : sample) {
        output->Add(node);
    }
}

TMersennemersenne twister, , TNode — , MapReduce-.


, .


3. TypeScript


-. — .


url' . , url /action?param=¶m=1;2;3¶m=8, url :


{
    "param" : ["", "1;2;3", "8"]
}

, . , , :


{
    "param" : ["1", "2", "3", "8"]
}

:


export type TParamValue = string | string[];
export type TParams = Record<string, TParamValue>;

export function normalizeParams(params: TParams): TParams {
    const result = {};

    for (const [paramName, paramValue] of Object.entries(params)) {
        //        query-
        if (Array.isArray(paramValue)) {
            result[paramName] = paramValue.reduce((acc, part) => {
                if (part) {
                    acc = acc.concat(part.split(';').filter(Boolean));
                }

                return acc;
            }, []);
        } else if (paramValue) {
            result[paramName] = paramValue.split(';').filter(Boolean);
        }
    }

    return result;
}

. — . — : , , .


4. Python


-; , , , . , . , !


— . — , . , , , , - - .


count = 0
firstPos = -1
clicks = 0

for block in blocks:
    result = bl.GetMainResult()
    if result.IsA('TWebResult'):
        url = result.Url
        pos = result.Pos
        propValue = result.PropValue

        if propValue in interestingValues:
            count += 1

            if firstPos == -1:
                firstPos = pos

            for cl in bl.GetClicks():
                clicks += 1

yield Record(count=count,firstPos=firstPos,clicks=clicks)

set, .


, — , , .


5.


, , , — . , YouTube (, ) .


. , . , .


def foo(nums):
    current = 0
    best = 0
    for n in nums:
        if n > 0:
            current += 1
            best = max(best, current)
        else:
            current = 0
    return best

. . , : . .


def dictFromString(s):
    d = defaultdict(int)
    for c in s:
        d[c] += 1
    return d

def areAnagrams(a, b):
    return dictFromString(a) == dictFromString(b)

. : n. 2n .


def generate(cur, open, closed, n):
    if len(cur) == 2 * n:
        print cur
        return
    if open < n:
        generate(cur + '(', open + 1, closed, n)
    if closed < open:
        generate(cur + ')', open, closed + 1, n)
def parens(n):
    generate('', 0, 0, n)

, , :


  • ;
  • , ;
  • (, );
  • ; , .

, , — , machine learning, , , , . , -: , , .




aka nkmakarov, , , !


All Articles