谁应该在预算上过得好?


介绍


   — ,
   — ,
  
  :
 ,
 ,
 ,
 ,
  :
, ,
, .
,  —
 ,
 —  :
  ,
  ?

.

几个月前,在一次IT活动上,我碰巧看到了Pandas的作品。和他一起工作的那个人没有什么特别令人惊讶的。但是对值的简单加法,平均值的计算,分组是如此精巧地进行,以至于即使我对Python充满偏见,我也着迷了。在2004年至2019年这段时间里,根据资本修复对相当不错的数据集进行了操作。数以十万计的行,但是一切工作都非常快。


- , Pandas. , Excel . .


. , , . , , . , , , , . - "barchart race" - " ". , barchart race. , . Barchart race , , , - , . , .



"" , , . , , . , , . . , , - . :


  1. , .
  2. , , - . .

:


. .



: " ". . . - . .


— 20- , , . , - , (, ), , , , .


- , . , 7 , 3 — ( ) 4, 5 — , 6, 7 — . , , , .


:








, , .


, . , . . . , , . 3 10 , . . . . 164. .


.


. 0507011, 2002 2019. - 2007 .


, , 2002 2019 .


, 0501 2003 " ", 2006 " ". 01 . 0103 2002 " ", 2005 " () ".


, . , , . 02 , 03 . . , .


. , , 1986 A Manual on Government Finance Statistics (GFSM 1986). https://www.imf.org/external/pubs/ft/gfs/manual/1986/eng/index.htm
, : 2001 2014 . GFSM 2014 .


4 "Functional classification", . Classicifation of the Functions of Government COFOG. - (). , . 4 : 2 — , 3 4 . , . 2002 . , , .


, .
COFOG, COFOG.


, , COFOG , . . , . . , , , " " , 01 ( ) . , , . , . . - . .


: " , -!"


. -


. , outer join . :



. , . . , . 2002 2019 240 , , . 95 . 2002 240, 17 8 . .


:



html :
https://vneberu.ru/


, , . . .


. ?


1 . 01 , . 0101.


17 , . , 01 , .. " " 01. , 01, . , GFSM 2014, - . , 70310 , , , , , ;


01 , - . . - . - . Pandas, . levenshtein_merge(). join Pandas DataFrame , . , . , , , , pip install Pandas DataFrame. , prepack.


2 , jupyter notebook .


#    
import pandas as pd
import numpy as np
import os, sys

#    
from prepack import prepack as pp

#      
pd.options.display.max_rows = 2000
pd.options.display.max_columns = 200
pd.options.display.max_colwidth = 500
pd.options.display.min_rows = 40

prepack. zip . file-like ,
. pd.read_excel(), excel.


names, files = pp.read_zip("raw_data.zip") 

pd.read_excel() , excel .


read_excel excel , .


, . . , .


pp.read_excel(files[0]).iloc[8:13,:]


012345678910111213141516171819202122232425262728
8(. .)(. .)
9\n« 2002 »,%
10
1112345678891010
1202010443678.7000000011082345210307275.310259178.00000000295.2309420321723894.78656162562555


, . . excel, 1 DataFrame, , .


3 : files, columns, fltr. excel , — , . — , , . 4 . .


, , 0 11 , , , . , . 3 :
8,9,10


#      1  DataFrame
df = pp.parse_excels(files, columns=[0,12], fltr={0: 'istext',1: 'isnum'}, header=[8,9,10])

#,  
df.head(5)


« 2002 »(. .),%src_filename
002010443678.7000000011082345210307275.310259178.00000000295.2309420321723894.786561625625552002
10200100158826.9153149.8153149.8150806.510098.469929441631662002
20200103158826.9153149.8153149.8150806.510098.469929441631662002
30200103037147458.5141865141865140224.410098.843548443943192002
4020010303702780921.675448.375448.374517.710098.766572606672372002


1 , pickle, prepack 0.4.2 pkl.gz


#   
df.to_csv('raw_data.csv.gz')
pp.save(df, 'raw_data.pkl.gz')

prepack. DataFrame, .


DataFrame, , , . , 3 iloc=True.


, , " " " ", . .


#         
f = pp.df_filter_and(df, {0: 'istext', 1: 'isnum', 2: 'isblank', 3: 'isblank'}, iloc=True)
ppp = df[f]
idx = ppp[' '].index
ppp = ppp.loc[idx,:]

, DataFrame , .


#        ,   
def df_filter(df, fltr):
    f = pp.df_filter_and(df, fltr)
    res = df[f]
    df.drop(res.index, axis=0, inplace=True)
    return res

, DataFrame .


, , . , . label . , ,
, .


def df_filter_post_proc(df, fltr, rzpr, label):
    df_ = df_filter(df, fltr)
    codes = df_['    '].unique()
    names = df_[' '].unique()
    return {'name': list(names), 'ppp': list(codes), 'rzpr': list(rzpr), 'label': list(label)}

, . , , 1 . . df . , , , . . . , : 1 , . . , .


.


#              
def groups_fill(df, groups):
    res = pd.DataFrame([])
    for g in groups:
        el = groups[g]

        ppp = el['ppp']
        rzpr = el['rzpr']
        label = el['label']

        #  ppp ,    -  
        if len(ppp) == 0:
            continue

        # 1   
        f1 = {'    ': ppp}
        # 2    ,      
        f2 = {'  ': rzpr}
        # 3 ,       
        f3 = {'  ': 'isblank'}
        # 3 ,       
        f4 = {'  ': 'isblank'}

        #  
        m1 = pp.df_filter_or(df, f1)
        m2 = pp.df_filter_or(df, f2)
        m3 = pp.df_filter_or(df, f3)
        m4 = pp.df_filter_or(df, f4)
        #       
        df1 = df[m1 & m2 & m3 & m4]

        #     ppp,    label ( )
        #  ppp      
        df_ = df[m1 & m4]
        f5 = {' ': label}
        m5 = pp.df_filter_or(df_, f5)
        df2 = df_[m5].copy()

        #          label (df2), / 
        #   /  rzpr (df1).      label   ,
        # / ,     /     ,      .
        #       3 : , ,  (src_filename)

        #   df1  df2
        #     df2,   
        df2['idx'] = df2.index
        intersect = pd.merge(df1.loc[:,['    ','  ', 'src_filename']],
                             df2.loc[:,['    ','  ', 'src_filename','idx']],
                             on=['    ','  ', 'src_filename'], how='inner')

        #    df2  
        df2_filtered = df2.drop(intersect['idx'], axis=0)

        # ,    df1  df2   1 ,   
        df3 = pd.concat([df1, df2_filtered], axis=0)
        v = df3[[' ',
                 '    ',
                 '  ',
                 '(. .) ',
                 'src_filename']].copy()
        #  
        v['name'] = g
        #    
        res = pd.concat([res, v], axis=0)
    return res

.


:


  1. , .
  2. 0101 0107 2002 2019 .
  3. , 0101 0107 . , .


lst = {}

#        
rzpr_base = ['0101','0102','0103','0104','0105','0106','0107']

#      
label_base = ['      ',
              r'contains=.*(?:.*).*',
              r'contains=.*(?:   ).*',
              r'contains=.*(?:  .*).*'
             ]

#           
f = {' ': r'contains=.*(?: ).*'}
rzpr = ['isblank']
df_slice = df_filter_post_proc(ppp, f, rzpr, label_base)
lst[''] = df_slice

rzpr = ['0101','0102','0103','0104','0105','0106','0107','0201']
f = {' ': r'contains=.*(?:|| |||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

rzpr = ['0101','0102','0103','0104','0105','0106','0107','0201']
f = {' ': r'contains=.*(?:|-| ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?:||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?: | ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?: ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?:|||| || ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice 

f = {' ': r'contains=.*(?:|| | |||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice 

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] =df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] =df_slice

f = {' ': r'contains=.*(?:|| |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] =df_slice

f = {' ': r'contains=.*(?:|||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[',   '] =df_slice

f = {' ': r'contains=.*(?:| | |||||| | | | | ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[', , , '] = df_slice

f = {' ': r'contains=.*(?:|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:||||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:||).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:||| | |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:| | || | | ).*'}
label = label_base.copy()
label = label + [' ',' ']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[',   '] = df_slice

f = {' ': r'contains=.*(?:| | | ).*'}
label = label_base.copy()
label = label + [' ']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label)
lst[''] = df_slice

f = {' ': r'contains=.*(?:).*'}
label = label_base.copy()
label = label + [r'contains=.* .*']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label)
lst[' '] = df_slice

f = {' ': r'contains=.*(?:| ).*'}
label = label_base.copy()
label = label + ['- ']
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label)
lst[''] = df_slice

f = {' ': r'contains=.*(?:|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*(?: |||| |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[', , , '] = df_slice

f = {' ': r'contains=.*(?:| |).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:||| | ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['  '] = df_slice

f = {' ': r'contains=.*(?:).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?:[]|).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

f = {' ': r'contains=.*(?: |  ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst['    '] = df_slice

f = {' ': r'contains=.*(?: ).*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

#    
f = {' ': r'contains=.*.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

f = {' ': r'contains=.*.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[' '] = df_slice

#   
f = {' ': r'contains=.*'}
df_slice = df_filter_post_proc(ppp, f, rzpr_base, label_base)
lst[''] = df_slice

, , .


# ,      
for group in lst:
    el = lst[group]
    print(len(el['name']), group, 'ppp:', '|'.join(el['ppp']))
print('', len(ppp))

1  ppp: 308
11  ppp: 125|126|177|464|722|184|187|186|721|185
3   ppp: 160|171|388
23    ppp: 083|316|497|204|048|059|060|077|079|081|093|106|141|151|498|587|086|085|087|096
3  ppp: 258|259|730
2   ppp: 330|333
1   ppp: 305
20  ppp: 056|164|166|409|424|425|591|597|057|058|167|175|054
35    ppp: 075|139|190|226|319|386|401|423|486|494|573|589|677|693|073|074|144|385|595|384|007
3  ppp: 181|205|182
3  ppp: 163|166|167
7  ppp: 092|520|720|723|521|724|100
6 ,    ppp: 149|154|159|155|157|172
13 , , ,  ppp: 140|162|165|197|263|161|168|307|142|139|160
1  ppp: 153
6    ppp: 071|156|072|173|321
10    ppp: 020|099|129|306|021|023|101|143|022|725
7  ppp: 029|104|103|107|108|109|110|179
12  ppp: 078|089|128|134|397|201|070|084|088|135|071
0  ppp: 
6  ppp: 206|303|352|588|304
10 ,    ppp: 153|188|415|189|202|192|416|417|180
4  ppp: 434|436|437|438
1   ppp: 322
2  ppp: 318|320
4   ppp: 076|082|080|085
7 , , ,  ppp: 050|158|049|052|053|169|051
6  ppp: 133|279|132|309|360|069
7    ppp: 054|148|387|055|061|064|056|149
2  ppp: 777
1  ppp: 150
2      ppp: 392|393
1  ppp: 310
5   ppp: 022|350|340|370
5   ppp: 174|091|095|260|380
0   ppp: 
3  ppp: 152|302|090
 0

#           ,   
lst[' ']

{'name': ['   ',
  '    ',
  '      , ,   ,     ',
  '       ',
  '    '],
 'ppp': ['174', '091', '095', '260', '380'],
 'rzpr': ['0101', '0102', '0103', '0104', '0105', '0106', '0107'],
 'label': ['      ',
  'contains=.*(?:.*).*',
  'contains=.*(?:   ).*',
  'contains=.*(?:  .*).*']}

, .


#   ,             
df2 = groups_fill(df,lst)

# ,  
df2


(. .)src_filenamename
32203081518661.70000000022002
78963089827426.62003
13252308727006.72004
183723082315399.809872005
242533082450335.730522006
3068130811218927.16572007
406263084199158.04438999952008
521793083063409.770612009
627333083042519.670032010
7295330811343783.966682011
9162230813596252.480872012
1118643083329199.291632013
1310993084392301.79685000052014
1492193083237166.952015
17092530815800336.1600000022016
1950703086073661.122017
22100030818414539.612018
2470263082237048.22019
183753080107709851.85847999992005
242563080107776471.92999999992006
.....................
3035302010557201.72002
7723302010563374.32003
13055302010570769.92004
18205302011596531.578992005
240633020115114431.648982006
2699309001152614.726792007
304793020115137220.767899999982007
35246090011402008
402813020114166654.658542008
518203020114176770.482620000022009
623663020114173045.542852010
725633020113167953.689910000022011
909663020113178229.354869999982012
1112073020113227229.214582013
130549, , ' ' ' '302011342004.5864099999962014
148654, , ' ' ' '302011349886.672015
170236,302011353390.832016
194281,302011356313.8800000000052017
220127,302011372847.510000000012018
246185,302011346021.012019

2970 rows × 6 columns



#         
df3 = df2.loc[:,['(. .) ', 'src_filename', 'name']]
#  
df3.columns = ['value', 'year', 'name']

#  ,  
idx = df3[df3.loc[:,'value'] == ''].index

#  0
df3.loc[idx, 'value'] = '0.0'

#  float64
df3 = df3.astype({'value': 'float64'})

#       
df4 = df3.groupby(['year','name']).sum().reset_index()

#         
def manuscript(df):
    res = pd.DataFrame([])
    for i in df.index:
        n = df.loc[i,'name']
        y = df.loc[i,'year']
        v = df.loc[i,'value']
        res.loc[n,y] = v
    res['name'] = res.index
    #      
    last_idx = res.shape[1] - 1
    order = [last_idx]
    order = order + list(range(0, last_idx))
    res = res.iloc[:,order]
    return res

#       
df5 = manuscript(df4)
df5


name200220032004200520062007200820092010201120122013201420152016201720182019
55118.369206.0174787.23.179523e+054.059359e+056.988135e+058.693810e+051.227829e+061.197151e+061.499655e+061.472361e+061.485361e+061.113370e+068.331200e+051.149470e+061.293278e+061.228704e+067.812578e+05
738567.6937116.81655342.41.473230e+071.436403e+072.029405e+072.347219e+073.066014e+073.164116e+073.242956e+073.368836e+073.491896e+073.741200e+076.415048e+075.411862e+073.814101e+073.722678e+072.967170e+07
199462.9291924.9468451.04.028164e+064.184440e+061.308096e+075.179386e+072.047398e+072.367009e+072.389663e+072.879189e+073.625769e+074.483317e+063.927149e+051.936604e+071.814460e+071.823454e+071.212784e+07
1518661.79827426.6727006.73.025252e+063.226808e+061.215190e+075.344751e+064.289653e+064.252440e+061.255509e+071.480231e+074.778233e+065.448697e+063.489300e+062.632606e+076.440813e+063.324976e+072.636338e+06
6888901.55191349.3496970.05.117778e+057.624081e+059.944042e+051.535074e+062.527582e+062.513591e+063.070057e+063.095424e+063.213713e+062.043384e+061.630418e+061.984821e+062.008460e+062.020177e+066.730164e+05
794096.1943669.81236022.81.143905e+061.359334e+061.644650e+062.372177e+064.427220e+064.299786e+066.742122e+065.628042e+065.303888e+061.711631e+061.495427e+061.285239e+061.296881e+061.219905e+066.484055e+05
604832.7779822.5930441.09.573190e+061.463692e+071.815751e+072.795691e+072.301704e+072.195236e+072.351573e+073.705156e+072.353878e+071.119027e+071.172543e+071.373801e+071.051243e+071.111666e+075.087854e+06
56971.870000.698668.31.181241e+051.383519e+051.828012e+052.497096e+053.089730e+053.540486e+053.443083e+053.396623e+053.611192e+051.789257e+062.145743e+067.194247e+061.714675e+061.333223e+062.213795e+05
244286.1251179.41125325.41.436764e+061.773667e+062.067159e+062.920725e+064.878187e+064.813045e+067.640206e+068.280279e+067.615266e+063.322063e+062.750865e+062.331090e+062.500820e+062.482645e+061.073571e+06
856340.81027204.16176704.01.717242e+072.367843e+072.996330e+073.597592e+073.839959e+073.769431e+073.900493e+073.809575e+073.996433e+071.205323e+071.187536e+078.373596e+068.764745e+069.350644e+065.261310e+06
24294501.929965687.134938483.14.267415e+076.058325e+079.738464e+071.066937e+081.017134e+089.750056e+071.020170e+081.049288e+081.095579e+081.235494e+081.183094e+081.295426e+081.401987e+081.641590e+081.168560e+08
256746.5314463.2320622.23.843661e+054.614256e+055.944249e+054.372921e+066.078403e+065.492050e+066.231623e+066.255479e+066.830796e+064.332645e+067.610444e+067.112033e+065.669845e+066.670268e+062.440637e+06
3982138.75450851.37313133.07.295283e+068.261189e+068.996543e+061.216107e+071.526446e+071.395581e+071.139751e+071.736144e+072.546027e+072.338378e+072.420630e+072.255378e+072.530381e+072.881064e+071.958996e+07
,,29356.9NaNNaN4.086273e+075.955347e+078.714469e+071.112304e+081.190517e+081.188130e+081.355925e+081.447262e+081.695464e+088.024443e+077.469808e+077.097024e+077.359769e+078.050007e+075.685519e+07
374737.2581511.9920630.11.025631e+061.291313e+061.679555e+062.112708e+062.495217e+062.507740e+062.548311e+062.392012e+064.119598e+061.153400e+071.049128e+074.193052e+073.526767e+073.388927e+072.525769e+07
57201.763374.370769.99.653158e+041.144316e+051.398355e+051.666547e+051.767705e+051.730455e+051.679537e+051.782294e+052.272292e+054.200459e+044.988667e+045.339083e+045.631388e+047.284751e+044.602101e+04
102794.5170606.0NaNNaNNaNNaN4.746758e+059.010814e+058.974934e+058.749721e+058.920889e+051.951463e+062.274708e+062.492276e+065.389089e+063.413066e+063.802542e+062.572781e+06
775849.61007700.0243261.24.159233e+054.419006e+055.578016e+057.366907e+051.017874e+061.062586e+061.503976e+061.438637e+072.216447e+063.794126e+064.711565e+068.426905e+064.390055e+063.707722e+061.053583e+06
242701.7216446.7400336.14.992449e+056.603246e+051.115254e+064.911316e+063.458776e+063.272414e+063.271444e+063.727729e+067.070989e+066.109341e+062.636282e+062.762238e+064.253678e+064.969401e+062.624879e+06
,,5428673.43463846.84047470.76.130480e+069.678373e+067.989950e+069.796944e+061.252393e+072.076566e+071.356919e+071.250955e+071.375976e+073.484282e+063.883531e+065.191875e+065.138944e+064.397301e+062.012586e+06
19461752.125119241.433012562.44.236063e+075.965057e+077.893130e+079.276448e+071.073020e+081.090315e+081.170127e+081.206795e+081.317020e+081.437869e+081.518439e+081.671576e+081.726870e+081.770484e+081.262668e+08
514159.2578396.5686709.51.047412e+061.106151e+061.273900e+061.691962e+061.817420e+061.945046e+061.995006e+062.159396e+062.443649e+062.903754e+063.565171e+063.454455e+063.484411e+063.780981e+062.791526e+06
29356.9NaNNaN1.952272e+073.177810e+075.183096e+075.701153e+075.431895e+075.355175e+076.228234e+075.793620e+076.371512e+076.195291e+075.720779e+075.445615e+075.600562e+075.757791e+074.080228e+07
997293.21259272.4526374.26.916618e+059.645825e+051.386756e+061.745424e+062.676544e+062.677625e+062.834927e+063.045155e+063.773597e+062.172722e+062.443524e+062.937038e+062.856930e+063.673195e+062.140081e+06
33821966.645644003.148200940.54.939072e+066.193123e+067.251381e+061.059864e+071.989783e+079.343257e+061.005613e+071.879268e+072.570202e+079.172114e+051.345042e+067.543578e+057.162469e+059.785970e+054.562662e+05
2453820.22629797.73197238.24.181236e+064.659688e+065.362667e+066.895839e+067.361260e+067.344989e+067.554316e+069.326065e+061.053433e+071.131073e+071.242822e+071.333192e+071.376513e+071.475574e+071.075951e+07
9795825.712659262.112562536.21.846512e+072.636404e+073.143353e+073.908707e+074.227111e+074.224021e+074.577294e+074.783098e+074.669822e+074.723468e+074.649914e+075.315978e+075.486682e+075.314100e+073.516734e+07
, , ,, , ,1414479.31648065.4790980.71.769120e+062.288039e+062.148871e+062.325598e+062.668416e+062.607731e+063.113915e+062.991145e+063.765731e+062.026427e+062.593518e+062.144792e+062.022290e+062.086581e+061.043217e+06
, , ,, , ,1097438.71623486.01329962.82.610266e+062.771028e+064.104223e+067.108139e+068.975361e+067.927798e+068.869630e+069.727466e+061.045263e+076.037323e+068.090558e+068.451552e+067.718410e+068.110111e+064.289423e+06
NaN179252.0235819.33.320298e+057.943606e+059.523479e+051.196923e+061.920132e+062.167992e+062.569884e+062.851141e+063.595451e+062.266321e+062.925800e+062.474541e+062.691113e+062.724423e+063.500916e+06
NaNNaN1770603.21.932432e+062.208957e+061.519598e+061.871774e+062.332351e+062.251716e+062.253084e+062.228262e+062.336381e+061.148488e+061.224942e+061.309529e+061.352477e+061.331878e+066.551340e+05
NaNNaNNaN4.959079e+048.257997e+048.007811e+043.208375e+051.905577e+062.141823e+062.734147e+062.566841e+063.105008e+064.561860e+065.291365e+064.921941e+063.635852e+063.703932e+062.351683e+06
NaNNaNNaN2.114784e+072.847243e+074.374566e+075.803671e+076.700059e+076.942675e+077.403237e+078.238733e+078.780665e+075.812453e+075.460078e+075.271739e+075.339494e+075.971298e+074.166289e+07
NaNNaNNaNNaNNaNNaN1.449746e+055.344759e+054.945119e+054.737492e+055.309736e+058.497661e+058.696530e+054.209008e+053.525698e+053.390244e+053.446490e+051.438859e+05


#        d3.js    
def prep_data(df):
    lst = []
    size = df.shape[0]
    for i in range(0, size):
        row = df.iloc[i,:]
        name = row['name']

        #skip first column
        row_ = row[1:]
        for k, y in enumerate(row_.index):
            begin = float(row_[k]) # current year
            try:
                end = float(row_[k + 1]) # next year
            except:
                end = float(row_[k]) 

            range_ = end - begin
            step = range_ / 10

            cur = begin
            for n in range(0,10):
                last = cur
                cur = begin + step * (n+1)
                if cur < 0:
                    cur = 0.0
                lst.append({'name': name,
                            'value': round(cur, 2),
                            'year': float(str(y) + '.' + str(n)),
                            'lastValue': round(last, 2),
                            'rank': 0})

    df2 = pd.DataFrame(lst)
    df2 = df2.sort_values(by=['year','value'])
    df2.reset_index(drop=True, inplace = True)
    df2['rank'] = range(1,df2.shape[0]+1)
    return df2

#  
data = prep_data(df5)

data


namevalueyearlastValuerank
05.652707e+042002.05.511830e+041
15.781896e+042002.05.720170e+042
25.827468e+042002.05.697180e+043
31.095756e+052002.01.027945e+054
42.087091e+052002.01.994629e+055
52.400762e+052002.02.427017e+056
62.449754e+052002.02.442861e+057
72.625182e+052002.02.567465e+058
83.954147e+052002.03.747372e+059
95.205829e+052002.05.141592e+0510
106.223317e+052002.06.048327e+0511
117.584225e+052002.07.385676e+0512
127.990346e+052002.07.758496e+0513
138.090535e+052002.07.940961e+0514
148.734271e+052002.08.563408e+0515
151.023491e+062002.09.972932e+0516
16, , ,1.150043e+062002.01.097439e+0617
17, , ,1.437838e+062002.01.414479e+0618
182.349538e+062002.01.518662e+0619
192.471418e+062002.02.453820e+0620
..................
61002.440637e+062019.92.440637e+066101
61012.572781e+062019.92.572781e+066102
61022.624879e+062019.92.624879e+066103
61032.636338e+062019.92.636338e+066104
61042.791526e+062019.92.791526e+066105
61053.500916e+062019.93.500916e+066106
6106, , ,4.289423e+062019.94.289423e+066107
61075.087854e+062019.95.087854e+066108
61085.261310e+062019.95.261310e+066109
61091.075951e+072019.91.075951e+076110
61101.212784e+072019.91.212784e+076111
61111.958996e+072019.91.958996e+076112
61122.525769e+072019.92.525769e+076113
61132.967170e+072019.92.967170e+076114
61143.516734e+072019.93.516734e+076115
61154.080228e+072019.94.080228e+076116
61164.166289e+072019.94.166289e+076117
6117,5.685519e+072019.95.685519e+076118
61181.168560e+082019.91.168560e+086119
61191.262668e+082019.91.262668e+086120

6120行×5列



#    csv    d3.js
data.to_csv('data_groups.csv', index=False)

接下来,您需要将结果csv文件中的内容写入变量中的文件https://github.com/legale/d3.js-portable/blob/master/barchart-race-ppp-bundle2.htmlcsv_string


我们看一下收到的比赛。比赛结果不言而喻。


此处为HTML:https
//vneberu.ru/barchart-race2.html


这是youtube上的视频:



感谢您的关注,欢迎发表评论。


链接


可以在以下位置查看prepack库:https : //github.com/legale/prepack
可以从上面的链接下载收集的带有种族的包,但是您可以在此处


All Articles