👈 🍼 👩🏽‍⚕️ Yandex开发人员每天执行哪些算法 💅🏿 🤜🏼 🗝️

关于开发人员是否需要在面试中编写算法代码的争论是无止境的。为了支持肯定的答案，我已经发表了一个有关用Yandex编写代码的算法部分的故事，以及在那里可以找到的任务示例。现在，我想发展这个主题并展示真实生产代码的示例。

所有示例都是由特定的开发人员在解决常规任务时编写的。在发布之前，我并未对代码进行任何改进，只是在某些地方对其进行了修改，以便在不了解我们的代码基础的情况下可以理解它。因此，某些代码示例对您来说似乎还不够酷，但是在持续不断的截止日期压力下，不可能完全研磨整个代码。

. C++, TypeScript Python. — , .

1. .

2016- .. , , . :

, , « », . . - - — , , : « », «», «». , - - «» «», .

. , , : - , ; . , -, . :

bool ContainsStopHash(const TString& text,
                      TEasyParser& easyParser,
                      const THashSet<size_t>& stopHashes)
{
    std::vector<TString> words;
    easyParser.ParseUTF8Text(text, &words);

    size_t unigramHash = 0;
    size_t bigramHash = 0;

    for (const TString& word : words) {
        const size_t lastUnigramHash = unigramHash;
        const size_t wordHash = word.hash();
        unigramHash = wordHash;
        bigramHash ^= wordHash;
        if (stopHashes.contains(unigramHash) || stopHashes.contains(bigramHash)) {
            return true;
        }
        bigramHash ^= lastUnigramHash;
    }
    return false;
}

TEasyParser — , , , THashSet — std::unordered_set, TString — .

— , : , . , , .

2. Reservoir sampling MapReduce

, - , . , , .

. , — , - - .

, , , , - . , std::shuffle.

, . , . , ; i, 0 i .

. , , K — , . K , , K . std::shuffle : K — , , .

: i- 0 i . , K — , - . reducer' :

void Do(TMRReader* input, TMRWriter* output) override {
    TVector<TNode> sample;

    TMersenne<ui64> mersenne;
    size_t passedItems = 0;

    for (; input->IsValid(); input->Next()) {
        ++passedItems;
        size_t position = mersenne.GenRand64() % passedItems;

        if (position >= ItemsToTake) {
            continue;
        }

        if (sample.size() < ItemsToTake) {
            sample.push_back(input->GetRow());
        } else {
            sample[position] = input->GetRow();
        }
    }

    Shuffle(sample.begin(), sample.end(), mersenne);
    for (const TNode& node : sample) {
        output->Add(node);
    }
}

TMersenne — mersenne twister, , TNode — , MapReduce-.

, .

3. TypeScript

-. — .

url' . , url /action?param=¶m=1;2;3¶m=8, url :

{
    "param" : ["", "1;2;3", "8"]
}

, . , , :

{
    "param" : ["1", "2", "3", "8"]
}

export type TParamValue = string | string[];
export type TParams = Record<string, TParamValue>;

export function normalizeParams(params: TParams): TParams {
    const result = {};

    for (const [paramName, paramValue] of Object.entries(params)) {
        //        query-
        if (Array.isArray(paramValue)) {
            result[paramName] = paramValue.reduce((acc, part) => {
                if (part) {
                    acc = acc.concat(part.split(';').filter(Boolean));
                }

                return acc;
            }, []);
        } else if (paramValue) {
            result[paramName] = paramValue.split(';').filter(Boolean);
        }
    }

    return result;
}

. — . — : , , .

4. Python

-; , , , . , . , !

— . — , . , , , , - - .

count = 0
firstPos = -1
clicks = 0

for block in blocks:
    result = bl.GetMainResult()
    if result.IsA('TWebResult'):
        url = result.Url
        pos = result.Pos
        propValue = result.PropValue

        if propValue in interestingValues:
            count += 1

            if firstPos == -1:
                firstPos = pos

            for cl in bl.GetClicks():
                clicks += 1

yield Record(count=count,firstPos=firstPos,clicks=clicks)

set, .

, — , , .

5.

, , , — . , YouTube (, ) .

. , . , .

def foo(nums):
    current = 0
    best = 0
    for n in nums:
        if n > 0:
            current += 1
            best = max(best, current)
        else:
            current = 0
    return best

. . , : . .

def dictFromString(s):
    d = defaultdict(int)
    for c in s:
        d[c] += 1
    return d

def areAnagrams(a, b):
    return dictFromString(a) == dictFromString(b)

. : n. 2n .

def generate(cur, open, closed, n):
    if len(cur) == 2 * n:
        print cur
        return
    if open < n:
        generate(cur + '(', open + 1, closed, n)
    if closed < open:
        generate(cur + ')', open, closed + 1, n)
def parens(n):
    generate('', 0, 0, n)

, , :

;
, ;
(, );
; , .

, , — , machine learning, , , , . , -: , , .

aka nkmakarov, , , !

Yandex开发人员每天执行哪些算法

1. .

2. Reservoir sampling MapReduce

3. TypeScript

4. Python

5.

More articles: