рдПрдХ рдмрдЬрдЯ рдкрд░ рдХрд┐рд╕реЗ рдЕрдЪреНрдЫрд╛ рд░рд╣рдирд╛ рдЪрд╛рд╣рд┐рдП?


рдкрд░рд┐рдЪрдп


   тАФ ,
   тАФ ,
  
  :
 ,
 ,
 ,
 ,
  :
, ,
, .
,  тАФ
 ,
 тАФ  :
  ,
  ?

.

рдХреБрдЫ рдорд╣реАрдиреЗ рдкрд╣рд▓реЗ, рдПрдХ рдЖрдИрдЯреА рдХрд╛рд░реНрдпрдХреНрд░рдо рдореЗрдВ, рдореИрдВ рдкрдВрдбреЛрдВ рдХреЗ рдХрд╛рдо рдХреЛ рджреЗрдЦрдиреЗ рдХреЗ рд▓рд┐рдП рд╣реБрдЖ рдерд╛ред рдЙрд╕рдХреЗ рд╕рд╛рде рдХрд╛рдо рдХрд░рдиреЗ рд╡рд╛рд▓реЗ рд╡реНрдпрдХреНрддрд┐ рдиреЗ рдХреБрдЫ рднреА рдЖрд╢реНрдЪрд░реНрдпрдЪрдХрд┐рдд рдирд╣реАрдВ рдХрд┐рдпрд╛ред рд▓реЗрдХрд┐рди рдореВрд▓реНрдпреЛрдВ рдХрд╛ рд╕рд╛рдзрд╛рд░рдг рдЬреЛрдбрд╝, рдФрд╕рдд рдХреА рдЧрдгрдирд╛, рд╕рдореВрд╣реЛрдВ рдХреЛ рдЗрддрдиреА рдорд╣рд╛рд░рдд рд╕реЗ рдЕрдВрдЬрд╛рдо рджрд┐рдпрд╛ рдЧрдпрд╛ рдХрд┐, рдкрд╛рдпрдерди рдХреЗ рдкреНрд░рддрд┐ рдореЗрд░реЗ рд╕рднреА рдкреВрд░реНрд╡рд╛рдЧреНрд░рд╣ рдХреЗ рд╕рд╛рде, рдореИрдВ рдореЛрд╣рд┐рдд рд╣реЛ рдЧрдпрд╛ред 2004 рд╕реЗ 2019 рддрдХ рдХреА рдЕрд╡рдзрд┐ рдХреЗ рд▓рд┐рдП рдкреВрдВрдЬреА рдорд░рдореНрдордд рдХреЗ рдЕрдиреБрд╕рд╛рд░ рдХрд╛рдлреА рд╕рднреНрдп рдбреЗрдЯрд╛рд╕реЗрдЯ рдкрд░ рд╣реЗрд░рдлреЗрд░ рдХрд┐рдП рдЧрдП рдереЗред рд╕реИрдХрдбрд╝реЛрдВ рд╣рдЬрд╛рд░реЛрдВ рд▓рд╛рдЗрдиреЗрдВ, рд▓реЗрдХрд┐рди рд╕рдм рдХреБрдЫ рдмрд╣реБрдд рддреЗрдЬреА рд╕реЗ рдХрд╛рдо рдХрд┐рдпрд╛ред


- , Pandas. , Excel . .


. , , . , , . , , , , . - "barchart race" - " ". , barchart race. , . Barchart race , , , - , . , .



"" , , . , , . , , . . , , - . :


  1. , .
  2. , , - . .

:


. .



: " ". . . - . .


тАФ 20- , , . , - , (, ), , , , .


- , . , 7 , 3 тАФ ( ) 4, 5 тАФ , 6, 7 тАФ . , , , .


:




    • тАФ

    • тАФ
    • тАФ
    • тАФ
      • тАФ
      • тАФ
      • тАФ
      • тАФ

    • тАФ

, , .


, . , . . . , , . 3 10 , . . . . 164. .


.


. 0507011, 2002 2019. - 2007 .


, , 2002 2019 .


, 0501 2003 " ", 2006 " ". 01 . 0103 2002 " ", 2005 " () ".


, . , , . 02 , 03 . . , .


. , , 1986 A Manual on Government Finance Statistics (GFSM 1986). https://www.imf.org/external/pubs/ft/gfs/manual/1986/eng/index.htm
, : 2001 2014 . GFSM 2014 .


4 "Functional classification", . Classicifation of the Functions of Government COFOG. - (). , . 4 : 2 тАФ , 3 4 . , . 2002 . , , .


, .
COFOG, COFOG.


, , COFOG , . . , . . , , , " " , 01 ( ) . , , . , . . - . .


: " , -!"


. -


. , outer join . :



. , . . , . 2002 2019 240 , , . 95 . 2002 240, 17 8 . .


:



html :
https://vneberu.ru/


, , . . .


. ?


1 . 01 , . 0101.


17 , . , 01 , .. " " 01. , 01, . , GFSM 2014, - . , 70310 , , , , , ;


01 , - . . - . - . Pandas, . levenshtein_merge(). join Pandas DataFrame , . , . , , , , pip install Pandas DataFrame. , prepack.


2 , jupyter notebook .


#    
import pandas as pd
import numpy as np
import os, sys

#    
from prepack import prepack as pp

#      
pd.options.display.max_rows = 2000
pd.options.display.max_columns = 200
pd.options.display.max_colwidth = 500
pd.options.display.min_rows = 40

prepack. zip . file-like ,
. pd.read_excel(), excel.


names, files = pp.read_zip("raw_data.zip") 

pd.read_excel() , excel .


read_excel excel , .


, . . , .


pp.read_excel(files[0]).iloc[8:13,:]


012345678910111213141516171819202122232425262728
8(. .)(. .)
9\n┬л 2002 ┬╗,%
10
1112345678891010
1202010443678.7000000011082345210307275.310259178.00000000295.2309420321723894.78656162562555


, . . excel, 1 DataFrame, , .


3 : files, columns, fltr. excel , тАФ , . тАФ , , . 4 . .


, , 0 11 , , , . , . 3 :
8,9,10


#      1  DataFrame
df = pp.parse_excels(files, columns=[0,12], fltr={0: 'istext',1: 'isnum'}, header=[8,9,10])

#,  
df.head(5)


┬л 2002 ┬╗(. .),%src_filename
002010443678.7000000011082345210307275.310259178.00000000295.2309420321723894.786561625625552002
10200100158826.9153149.8153149.8150806.510098.469929441631662002
20200103158826.9153149.8153149.8150806.510098.469929441631662002
30200103037147458.5141865141865140224.410098.843548443943192002
4020010303702780921.675448.375448.374517.710098.766572606672372002


1 , pickle, prepack 0.4.2 pkl.gz


#   
df.to_csv('raw_data.csv.gz')
pp.save(df, 'raw_data.pkl.gz')

prepack. DataFrame, .


DataFrame, , , . , 3 iloc=True.


, , " " " ", . .


#         
f = pp.df_filter_and(df, {0: 'istext', 1: 'isnum', 2: 'isblank', 3: 'isblank'}, iloc=True)
ppp = df[f]
idx = ppp[' '].index
ppp = ppp.loc[idx,:]

, DataFrame , .


#        ,   
def df_filter(df, fltr):
    f = pp.df_filter_and(df, fltr)
    res = df[f]
    df.drop(res.index, axis=0, inplace=True)
    return res

, DataFrame .


, , . , . label . , ,
, .


def df_filter_post_proc(df, fltr, rzpr, label):
    df_ = df_filter(df, fltr)
    codes = df_['    '].unique()
    names = df_[' '].unique()
    return {'name': list(names), 'ppp': list(codes), 'rzpr': list(rzpr), 'label': list(label)}

, . , , 1 . . df . , , , . . . , : 1 , . . , .


.


#              
def groups_fill(df, groups):
    res = pd.DataFrame([])
    for g in groups:
        el = groups[g]

        ppp = el['ppp']
        rzpr = el['rzpr']
        label = el['label']

        #  ppp ,    -  
        if len(ppp) == 0:
            continue

        # 1   
        f1 = {'    ': ppp}
        # 2    ,      
        f2 = {'  ': rzpr}
        # 3 ,       
        f3 = {'  ': 'isblank'}
        # 3 ,       
        f4 = {'  ': 'isblank'}

        #  
        m1 = pp.df_filter_or(df, f1)
        m2 = pp.df_filter_or(df, f2)
        m3 = pp.df_filter_or(df, f3)
        m4 = pp.df_filter_or(df, f4)
        #       
        df1 = df[m1 & m2 & m3 & m4]

        #     ppp,    label ( )
        #  ppp      
        df_ = df[m1 & m4]
        f5 = {' ': label}
        m5 = pp.df_filter_or(df_, f5)
        df2 = df_[m5].copy()

        #          label (df2), / 
        #   /  rzpr (df1).      label   ,
        # / ,     /     ,      .
        #       3 : , ,  (src_filename)

        #   df1  df2
        #     df2,   
        df2['idx'] = df2.index
        intersect = pd.merge(df1.loc[:,['    ','  ', 'src_filename']],
                             df2.loc[:,['    ','  ', 'src_filename','idx']],
                             on=['    ','  ', 'src_filename'], how='inner')

        #    df2  
        df2_filtered = df2.drop(intersect['idx'], axis=0)

        # ,    df1  df2   1 ,   
        df3 = pd.concat([df1, df2_filtered], axis=0)
        v = df3[[' ',
                 '    ',
                 '  ',
                 '(. .) ',
                 'src_filename']].copy()
        #  
        v['name'] = g
        #    
        res = pd.concat([res, v], axis=0)
    return res

.


:


  1. , .
  2. 0101 0107 2002 2019 .
  3. , 0101 0107 . , .


lst = {}

#        
rzpr_base = ['0101','0102','0103','0104','0105','0106','0107']

#      
label_base = ['      ',
              r'contains=.*(?:.*).*',
              r'contains=.*(?:   ).*',
              r'contains=.*(?:  .*).*'
             ]

#           
f = {' ': r'contains=.*(?: ).*'}
rzpr = ['isblank']
df_slice = df_filter_post_proc(ppp, f, rzpr, label_base)
lst[''] = df_slice

rzpr = ['0101','0102','0103','0104','0105','0106','0107','0201']
f = {' ': r'contains=.*(?:|| |||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

rzpr = ['0101','0102','0103','0104','0105','0106','0107','0201']
f = {' ': r'contains=.*(?:|-| ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?:||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?: | ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?: ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?:|||| || ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice 

f = {' ': r'contains=.*(?:|| | |||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice 

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] =df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] =df_slice

f = {' ': r'contains=.*(?:|| |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] =df_slice

f = {' ': r'contains=.*(?:|||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[',   '] =df_slice

f = {' ': r'contains=.*(?:| | |||||| | | | | ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[', , , '] = df_slice

f = {' ': r'contains=.*(?:|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:||||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:||| | |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:| | || | | ).*'}
label = label_base.copy()
label = label + [' ',' ']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[',   '] = df_slice

f = {' ': r'contains=.*(?:| | | ).*'}
label = label_base.copy()
label = label + [' ']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label)
lst[''] = df_slice

f = {' ': r'contains=.*(?:).*'}
label = label_base.copy()
label = label + [r'contains=.* .*']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label)
lst[' '] = df_slice

f = {' ': r'contains=.*(?:| ).*'}
label = label_base.copy()
label = label + ['- ']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label)
lst[''] = df_slice

f = {' ': r'contains=.*(?:|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?: |||| |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[', , , '] = df_slice

f = {' ': r'contains=.*(?:| |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:||| | ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:[]|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?: |  ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['    '] = df_slice

f = {' ': r'contains=.*(?: ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

#    
f = {' ': r'contains=.*.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

#   
f = {' ': r'contains=.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

, , .


# ,      
for group in lst:
    el = lst[group]
    print(len(el['name']), group, 'ppp:', '|'.join(el['ppp']))
print('', len(ppp))

1  ppp: 308
11  ppp: 125|126|177|464|722|184|187|186|721|185
3   ppp: 160|171|388
23    ppp: 083|316|497|204|048|059|060|077|079|081|093|106|141|151|498|587|086|085|087|096
3  ppp: 258|259|730
2   ppp: 330|333
1   ppp: 305
20  ppp: 056|164|166|409|424|425|591|597|057|058|167|175|054
35    ppp: 075|139|190|226|319|386|401|423|486|494|573|589|677|693|073|074|144|385|595|384|007
3  ppp: 181|205|182
3  ppp: 163|166|167
7  ppp: 092|520|720|723|521|724|100
6 ,    ppp: 149|154|159|155|157|172
13 , , ,  ppp: 140|162|165|197|263|161|168|307|142|139|160
1  ppp: 153
6    ppp: 071|156|072|173|321
10    ppp: 020|099|129|306|021|023|101|143|022|725
7  ppp: 029|104|103|107|108|109|110|179
12  ppp: 078|089|128|134|397|201|070|084|088|135|071
0  ppp: 
6  ppp: 206|303|352|588|304
10 ,    ppp: 153|188|415|189|202|192|416|417|180
4  ppp: 434|436|437|438
1   ppp: 322
2  ppp: 318|320
4   ppp: 076|082|080|085
7 , , ,  ppp: 050|158|049|052|053|169|051
6  ppp: 133|279|132|309|360|069
7    ppp: 054|148|387|055|061|064|056|149
2  ppp: 777
1  ppp: 150
2      ppp: 392|393
1  ppp: 310
5   ppp: 022|350|340|370
5   ppp: 174|091|095|260|380
0   ppp: 
3  ppp: 152|302|090
 0

#           ,   
lst[' ']

{'name': ['   ',
  '    ',
  '      , ,   ,     ',
  '       ',
  '    '],
 'ppp': ['174', '091', '095', '260', '380'],
 'rzpr': ['0101', '0102', '0103', '0104', '0105', '0106', '0107'],
 'label': ['      ',
  'contains=.*(?:.*).*',
  'contains=.*(?:   ).*',
  'contains=.*(?:  .*).*']}

, .


#   ,             
df2 = groups_fill(df,lst)

# ,  
df2


(. .)src_filenamename
32203081518661.70000000022002
78963089827426.62003
13252308727006.72004
183723082315399.809872005
242533082450335.730522006
3068130811218927.16572007
406263084199158.04438999952008
521793083063409.770612009
627333083042519.670032010
7295330811343783.966682011
9162230813596252.480872012
1118643083329199.291632013
1310993084392301.79685000052014
1492193083237166.952015
17092530815800336.1600000022016
1950703086073661.122017
22100030818414539.612018
2470263082237048.22019
183753080107709851.85847999992005
242563080107776471.92999999992006
.....................
3035302010557201.72002
7723302010563374.32003
13055302010570769.92004
18205302011596531.578992005
240633020115114431.648982006
2699309001152614.726792007
304793020115137220.767899999982007
35246090011402008
402813020114166654.658542008
518203020114176770.482620000022009
623663020114173045.542852010
725633020113167953.689910000022011
909663020113178229.354869999982012
1112073020113227229.214582013
130549, , ' ' ' '302011342004.5864099999962014
148654, , ' ' ' '302011349886.672015
170236,302011353390.832016
194281,302011356313.8800000000052017
220127,302011372847.510000000012018
246185,302011346021.012019

2970 rows ├Ч 6 columns



#         
df3 = df2.loc[:,['(. .) ', 'src_filename', 'name']]
#  
df3.columns = ['value', 'year', 'name']

#  ,  
idx = df3[df3.loc[:,'value'] == ''].index

#  0
df3.loc[idx, 'value'] = '0.0'

#  float64
df3 = df3.astype({'value': 'float64'})

#       
df4 = df3.groupby(['year','name']).sum().reset_index()

#         
def manuscript(df):
    res = pd.DataFrame([])
    for i in df.index:
        n = df.loc[i,'name']
        y = df.loc[i,'year']
        v = df.loc[i,'value']
        res.loc[n,y] = v
    res['name'] = res.index
    #      
    last_idx = res.shape[1] - 1
    order = [last_idx]
    order = order + list(range(0, last_idx))
    res = res.iloc[:,order]
    return res

#       
df5 = manuscript(df4)
df5


name200220032004200520062007200820092010201120122013201420152016201720182019
55118.369206.0174787.23.179523e+054.059359e+056.988135e+058.693810e+051.227829e+061.197151e+061.499655e+061.472361e+061.485361e+061.113370e+068.331200e+051.149470e+061.293278e+061.228704e+067.812578e+05
738567.6937116.81655342.41.473230e+071.436403e+072.029405e+072.347219e+073.066014e+073.164116e+073.242956e+073.368836e+073.491896e+073.741200e+076.415048e+075.411862e+073.814101e+073.722678e+072.967170e+07
199462.9291924.9468451.04.028164e+064.184440e+061.308096e+075.179386e+072.047398e+072.367009e+072.389663e+072.879189e+073.625769e+074.483317e+063.927149e+051.936604e+071.814460e+071.823454e+071.212784e+07
1518661.79827426.6727006.73.025252e+063.226808e+061.215190e+075.344751e+064.289653e+064.252440e+061.255509e+071.480231e+074.778233e+065.448697e+063.489300e+062.632606e+076.440813e+063.324976e+072.636338e+06
6888901.55191349.3496970.05.117778e+057.624081e+059.944042e+051.535074e+062.527582e+062.513591e+063.070057e+063.095424e+063.213713e+062.043384e+061.630418e+061.984821e+062.008460e+062.020177e+066.730164e+05
794096.1943669.81236022.81.143905e+061.359334e+061.644650e+062.372177e+064.427220e+064.299786e+066.742122e+065.628042e+065.303888e+061.711631e+061.495427e+061.285239e+061.296881e+061.219905e+066.484055e+05
604832.7779822.5930441.09.573190e+061.463692e+071.815751e+072.795691e+072.301704e+072.195236e+072.351573e+073.705156e+072.353878e+071.119027e+071.172543e+071.373801e+071.051243e+071.111666e+075.087854e+06
56971.870000.698668.31.181241e+051.383519e+051.828012e+052.497096e+053.089730e+053.540486e+053.443083e+053.396623e+053.611192e+051.789257e+062.145743e+067.194247e+061.714675e+061.333223e+062.213795e+05
244286.1251179.41125325.41.436764e+061.773667e+062.067159e+062.920725e+064.878187e+064.813045e+067.640206e+068.280279e+067.615266e+063.322063e+062.750865e+062.331090e+062.500820e+062.482645e+061.073571e+06
856340.81027204.16176704.01.717242e+072.367843e+072.996330e+073.597592e+073.839959e+073.769431e+073.900493e+073.809575e+073.996433e+071.205323e+071.187536e+078.373596e+068.764745e+069.350644e+065.261310e+06
24294501.929965687.134938483.14.267415e+076.058325e+079.738464e+071.066937e+081.017134e+089.750056e+071.020170e+081.049288e+081.095579e+081.235494e+081.183094e+081.295426e+081.401987e+081.641590e+081.168560e+08
256746.5314463.2320622.23.843661e+054.614256e+055.944249e+054.372921e+066.078403e+065.492050e+066.231623e+066.255479e+066.830796e+064.332645e+067.610444e+067.112033e+065.669845e+066.670268e+062.440637e+06
3982138.75450851.37313133.07.295283e+068.261189e+068.996543e+061.216107e+071.526446e+071.395581e+071.139751e+071.736144e+072.546027e+072.338378e+072.420630e+072.255378e+072.530381e+072.881064e+071.958996e+07
,,29356.9NaNNaN4.086273e+075.955347e+078.714469e+071.112304e+081.190517e+081.188130e+081.355925e+081.447262e+081.695464e+088.024443e+077.469808e+077.097024e+077.359769e+078.050007e+075.685519e+07
374737.2581511.9920630.11.025631e+061.291313e+061.679555e+062.112708e+062.495217e+062.507740e+062.548311e+062.392012e+064.119598e+061.153400e+071.049128e+074.193052e+073.526767e+073.388927e+072.525769e+07
57201.763374.370769.99.653158e+041.144316e+051.398355e+051.666547e+051.767705e+051.730455e+051.679537e+051.782294e+052.272292e+054.200459e+044.988667e+045.339083e+045.631388e+047.284751e+044.602101e+04
102794.5170606.0NaNNaNNaNNaN4.746758e+059.010814e+058.974934e+058.749721e+058.920889e+051.951463e+062.274708e+062.492276e+065.389089e+063.413066e+063.802542e+062.572781e+06
775849.61007700.0243261.24.159233e+054.419006e+055.578016e+057.366907e+051.017874e+061.062586e+061.503976e+061.438637e+072.216447e+063.794126e+064.711565e+068.426905e+064.390055e+063.707722e+061.053583e+06
242701.7216446.7400336.14.992449e+056.603246e+051.115254e+064.911316e+063.458776e+063.272414e+063.271444e+063.727729e+067.070989e+066.109341e+062.636282e+062.762238e+064.253678e+064.969401e+062.624879e+06
,,5428673.43463846.84047470.76.130480e+069.678373e+067.989950e+069.796944e+061.252393e+072.076566e+071.356919e+071.250955e+071.375976e+073.484282e+063.883531e+065.191875e+065.138944e+064.397301e+062.012586e+06
19461752.125119241.433012562.44.236063e+075.965057e+077.893130e+079.276448e+071.073020e+081.090315e+081.170127e+081.206795e+081.317020e+081.437869e+081.518439e+081.671576e+081.726870e+081.770484e+081.262668e+08
514159.2578396.5686709.51.047412e+061.106151e+061.273900e+061.691962e+061.817420e+061.945046e+061.995006e+062.159396e+062.443649e+062.903754e+063.565171e+063.454455e+063.484411e+063.780981e+062.791526e+06
29356.9NaNNaN1.952272e+073.177810e+075.183096e+075.701153e+075.431895e+075.355175e+076.228234e+075.793620e+076.371512e+076.195291e+075.720779e+075.445615e+075.600562e+075.757791e+074.080228e+07
997293.21259272.4526374.26.916618e+059.645825e+051.386756e+061.745424e+062.676544e+062.677625e+062.834927e+063.045155e+063.773597e+062.172722e+062.443524e+062.937038e+062.856930e+063.673195e+062.140081e+06
33821966.645644003.148200940.54.939072e+066.193123e+067.251381e+061.059864e+071.989783e+079.343257e+061.005613e+071.879268e+072.570202e+079.172114e+051.345042e+067.543578e+057.162469e+059.785970e+054.562662e+05
2453820.22629797.73197238.24.181236e+064.659688e+065.362667e+066.895839e+067.361260e+067.344989e+067.554316e+069.326065e+061.053433e+071.131073e+071.242822e+071.333192e+071.376513e+071.475574e+071.075951e+07
9795825.712659262.112562536.21.846512e+072.636404e+073.143353e+073.908707e+074.227111e+074.224021e+074.577294e+074.783098e+074.669822e+074.723468e+074.649914e+075.315978e+075.486682e+075.314100e+073.516734e+07
, , ,, , ,1414479.31648065.4790980.71.769120e+062.288039e+062.148871e+062.325598e+062.668416e+062.607731e+063.113915e+062.991145e+063.765731e+062.026427e+062.593518e+062.144792e+062.022290e+062.086581e+061.043217e+06
, , ,, , ,1097438.71623486.01329962.82.610266e+062.771028e+064.104223e+067.108139e+068.975361e+067.927798e+068.869630e+069.727466e+061.045263e+076.037323e+068.090558e+068.451552e+067.718410e+068.110111e+064.289423e+06
NaN179252.0235819.33.320298e+057.943606e+059.523479e+051.196923e+061.920132e+062.167992e+062.569884e+062.851141e+063.595451e+062.266321e+062.925800e+062.474541e+062.691113e+062.724423e+063.500916e+06
NaNNaN1770603.21.932432e+062.208957e+061.519598e+061.871774e+062.332351e+062.251716e+062.253084e+062.228262e+062.336381e+061.148488e+061.224942e+061.309529e+061.352477e+061.331878e+066.551340e+05
NaNNaNNaN4.959079e+048.257997e+048.007811e+043.208375e+051.905577e+062.141823e+062.734147e+062.566841e+063.105008e+064.561860e+065.291365e+064.921941e+063.635852e+063.703932e+062.351683e+06
NaNNaNNaN2.114784e+072.847243e+074.374566e+075.803671e+076.700059e+076.942675e+077.403237e+078.238733e+078.780665e+075.812453e+075.460078e+075.271739e+075.339494e+075.971298e+074.166289e+07
NaNNaNNaNNaNNaNNaN1.449746e+055.344759e+054.945119e+054.737492e+055.309736e+058.497661e+058.696530e+054.209008e+053.525698e+053.390244e+053.446490e+051.438859e+05


#        d3.js    
def prep_data(df):
    lst = []
    size = df.shape[0]
    for i in range(0, size):
        row = df.iloc[i,:]
        name = row['name']

        #skip first column
        row_ = row[1:]
        for k, y in enumerate(row_.index):
            begin = float(row_[k]) # current year
            try:
                end = float(row_[k + 1]) # next year
            except:
                end = float(row_[k]) 

            range_ = end - begin
            step = range_ / 10

            cur = begin
            for n in range(0,10):
                last = cur
                cur = begin + step * (n+1)
                if cur < 0:
                    cur = 0.0
                lst.append({'name': name,
                            'value': round(cur, 2),
                            'year': float(str(y) + '.' + str(n)),
                            'lastValue': round(last, 2),
                            'rank': 0})

    df2 = pd.DataFrame(lst)
    df2 = df2.sort_values(by=['year','value'])
    df2.reset_index(drop=True, inplace = True)
    df2['rank'] = range(1,df2.shape[0]+1)
    return df2

#  
data = prep_data(df5)

data


namevalueyearlastValuerank
05.652707e+042002.05.511830e+041
15.781896e+042002.05.720170e+042
25.827468e+042002.05.697180e+043
31.095756e+052002.01.027945e+054
42.087091e+052002.01.994629e+055
52.400762e+052002.02.427017e+056
62.449754e+052002.02.442861e+057
72.625182e+052002.02.567465e+058
83.954147e+052002.03.747372e+059
95.205829e+052002.05.141592e+0510
106.223317e+052002.06.048327e+0511
117.584225e+052002.07.385676e+0512
127.990346e+052002.07.758496e+0513
138.090535e+052002.07.940961e+0514
148.734271e+052002.08.563408e+0515
151.023491e+062002.09.972932e+0516
16, , ,1.150043e+062002.01.097439e+0617
17, , ,1.437838e+062002.01.414479e+0618
182.349538e+062002.01.518662e+0619
192.471418e+062002.02.453820e+0620
..................
61002.440637e+062019.92.440637e+066101
61012.572781e+062019.92.572781e+066102
61022.624879e+062019.92.624879e+066103
61032.636338e+062019.92.636338e+066104
61042.791526e+062019.92.791526e+066105
61053.500916e+062019.93.500916e+066106
6106, , ,4.289423e+062019.94.289423e+066107
61075.087854e+062019.95.087854e+066108
61085.261310e+062019.95.261310e+066109
61091.075951e+072019.91.075951e+076110
61101.212784e+072019.91.212784e+076111
61111.958996e+072019.91.958996e+076112
61122.525769e+072019.92.525769e+076113
61132.967170e+072019.92.967170e+076114
61143.516734e+072019.93.516734e+076115
61154.080228e+072019.94.080228e+076116
61164.166289e+072019.94.166289e+076117
6117,5.685519e+072019.95.685519e+076118
61181.168560e+082019.91.168560e+086119
61191.262668e+082019.91.262668e+086120

6120 рдкрдВрдХреНрддрд┐рдпреЛрдВ ├Ч 5 рдХреЙрд▓рдо



#    csv    d3.js
data.to_csv('data_groups.csv', index=False)

рдЗрд╕рдХреЗ рдмрд╛рдж, рдЖрдкрдХреЛ рдкрд░рд┐рдгрд╛рдореА csv рдлрд╝рд╛рдЗрд▓ рд╕реЗ рд╕рд╛рдордЧреНрд░реА рдХреЛ https://github.com/legale/d3.js-portable/blob/master/barchart-race-ppp-bundle2.html рдЪрд░ рдореЗрдВ рд▓рд┐рдЦрдирд╛ рд╣реЛрдЧрд╛ csv_stringред


рд╣рдо рдкреНрд░рд╛рдкреНрдд рджреМрдбрд╝ рдХреЛ рджреЗрдЦрддреЗ рд╣реИрдВред рджреМрдбрд╝ рдкрд░рд┐рдгрд╛рдо рдЦреБрдж рдХреЗ рд▓рд┐рдП рдмреЛрд▓рддреЗ рд╣реИрдВред


рдпрд╣рд╛рдБ html рдореЗрдВ:
https://vneberu.ru/barchart-race2.html


рдпрд╣рд╛рдБ рдПрдХ рд╡реАрдбрд┐рдпреЛ рдпреВрдЯреНрдпреВрдм рдкрд░ рд╣реИ:



рдЖрдкрдХрд╛ рдзреНрдпрд╛рди рдХреЗ рд▓рд┐рдП рдзрдиреНрдпрд╡рд╛рдж, рдЯрд┐рдкреНрдкрдгреА рдореЗрдВ рдЖрдкрдХрд╛ рд╕реНрд╡рд╛рдЧрдд рд╣реИред


рд▓рд┐рдВрдХ


рдЖрдк рдпрд╣рд╛рдВ рдкреНрд░реАрдкреИрдХ рд▓рд╛рдЗрдмреНрд░реЗрд░реА рджреЗрдЦ рд╕рдХрддреЗ рд╣реИрдВ: https://github.com/legale/prepack
рджреМрдбрд╝ рдХреЗ рд╕рд╛рде рдПрдХрддреНрд░рд┐рдд рдмрдВрдбрд▓реЛрдВ рдХреЛ рдКрдкрд░ рджрд┐рдП рдЧрдП рд▓рд┐рдВрдХ рд╕реЗ рдбрд╛рдЙрдирд▓реЛрдб рдХрд┐рдпрд╛ рдЬрд╛ рд╕рдХрддрд╛ рд╣реИ, рд▓реЗрдХрд┐рди рдпрд╣рд╛рдВ ред


All Articles