关于开发人员是否需要在面试中编写算法代码的争论是无止境的。为了支持肯定的答案,我已经发表了一个有关用Yandex编写代码的算法部分的故事,以及在那里可以找到的任务示例。现在,我想发展这个主题并展示真实生产代码的示例。

所有示例都是由特定的开发人员在解决常规任务时编写的。在发布之前,我并未对代码进行任何改进,只是在某些地方对其进行了修改,以便在不了解我们的代码基础的情况下可以理解它。因此,某些代码示例对您来说似乎还不够酷,但是在持续不断的截止日期压力下,不可能完全研磨整个代码。
. C++, TypeScript Python. — , .
1. .
2016- .. , , . :

.
, , « », . . - - — , , : « », «», «». , - - «» «», .
. , , : - , ; . , -, . :
bool ContainsStopHash(const TString& text,
TEasyParser& easyParser,
const THashSet<size_t>& stopHashes)
{
std::vector<TString> words;
easyParser.ParseUTF8Text(text, &words);
size_t unigramHash = 0;
size_t bigramHash = 0;
for (const TString& word : words) {
const size_t lastUnigramHash = unigramHash;
const size_t wordHash = word.hash();
unigramHash = wordHash;
bigramHash ^= wordHash;
if (stopHashes.contains(unigramHash) || stopHashes.contains(bigramHash)) {
return true;
}
bigramHash ^= lastUnigramHash;
}
return false;
}
TEasyParser
— , , , THashSet
— std::unordered_set
, TString
— .
— , : , . , , .
2. Reservoir sampling MapReduce
, - , . , , .
. , — , - - .

, , , , - . , std::shuffle.
, . , . , ; i
, 0
i
.
. , , K
— , . K
, , K
. std::shuffle : K
— , , .
: i
- 0
i
. , K
— , - . reducer' :
void Do(TMRReader* input, TMRWriter* output) override {
TVector<TNode> sample;
TMersenne<ui64> mersenne;
size_t passedItems = 0;
for (; input->IsValid(); input->Next()) {
++passedItems;
size_t position = mersenne.GenRand64() % passedItems;
if (position >= ItemsToTake) {
continue;
}
if (sample.size() < ItemsToTake) {
sample.push_back(input->GetRow());
} else {
sample[position] = input->GetRow();
}
}
Shuffle(sample.begin(), sample.end(), mersenne);
for (const TNode& node : sample) {
output->Add(node);
}
}
TMersenne
— mersenne twister, , TNode
— , MapReduce-.
, .
3. TypeScript
-. — .
url' . , url /action?param=¶m=1;2;3¶m=8
, url :
{
"param" : ["", "1;2;3", "8"]
}
, . , , :
{
"param" : ["1", "2", "3", "8"]
}
:
export type TParamValue = string | string[];
export type TParams = Record<string, TParamValue>;
export function normalizeParams(params: TParams): TParams {
const result = {};
for (const [paramName, paramValue] of Object.entries(params)) {
// query-
if (Array.isArray(paramValue)) {
result[paramName] = paramValue.reduce((acc, part) => {
if (part) {
acc = acc.concat(part.split(';').filter(Boolean));
}
return acc;
}, []);
} else if (paramValue) {
result[paramName] = paramValue.split(';').filter(Boolean);
}
}
return result;
}
. — . — : , , .
4. Python
-; , , , . , . , !
— . — , . , , , , - - .
count = 0
firstPos = -1
clicks = 0
for block in blocks:
result = bl.GetMainResult()
if result.IsA('TWebResult'):
url = result.Url
pos = result.Pos
propValue = result.PropValue
if propValue in interestingValues:
count += 1
if firstPos == -1:
firstPos = pos
for cl in bl.GetClicks():
clicks += 1
yield Record(count=count,firstPos=firstPos,clicks=clicks)
set
, .
, — , , .
5.
, , , — . , YouTube (, ) .
. , . , .
def foo(nums):
current = 0
best = 0
for n in nums:
if n > 0:
current += 1
best = max(best, current)
else:
current = 0
return best
. . , : . .
def dictFromString(s):
d = defaultdict(int)
for c in s:
d[c] += 1
return d
def areAnagrams(a, b):
return dictFromString(a) == dictFromString(b)
. : n
. 2n
.
def generate(cur, open, closed, n):
if len(cur) == 2 * n:
print cur
return
if open < n:
generate(cur + '(', open + 1, closed, n)
if closed < open:
generate(cur + ')', open, closed + 1, n)
def parens(n):
generate('', 0, 0, n)
, , :
, , — , machine learning, , , , . , -: , , .
aka nkmakarov, , , !