Ville et données: analyse de la distance de marche des objets à Prague à l'aide de la science des données


Veeam R&D . 40 , , , Rustonka . Veeam , . , , , : , , , , . , , - — .


— , , , :


10 16 ?


1300 . , , .
, , , . , , , .



— , . 14 . 298 . . 1,3 .


14 . , , , . — , . , , , , . — , , .


. , , , . , , 100 , . , . IPR Praha : , , , , .. , .



. , . — , — . , 22 .


url_population =  'https://www.czso.cz/documents/10180/25233177/sldb_zv.csv'
df_population = pd.read_csv(url_population,encoding = "ISO 8859-2")

df_population = df_population[(df_population.uzcis == 44)& (df_population.nazev.str.find('Praha') != -1)][['nazev','u01','u04', 'u05', 'u06']]
df_population.rename(columns={'nazev':'Name','u01':'Total', 'u04':'Kids', 'u05':'Middle', 'u06':'Senior'}, inplace = True)
df_population['Name'] = df_population['Name'].map(lambda x: x.lower())

typuz_naznazevuzcisuzkodu01
krajHlavní město Praha10030181268796

. IPR Praha.


, GitHub repository


NameGeometryAreaTotalKids
0praha 1[[14.410891049000043, 50.078674687000046], [14...5538443.8630561.02391.0
1praha 10[[14.531321086000048, 50.072240288000046], [14...18599366.98113200.012213.0
2praha 11[[14.54355294800007, 50.03618763800006], [14.5...9793679.8475741.08688.0

?


: , , , . , : , , . - API, -, - , - .


, . XML , . 2273 . , , , , .
, 500 . , : , .


, geo-API .


# Coordinates retrieve function
import geocoder

def get_coordinates(dataFrame, index_row):
    dict_coordinates = {}
    total_count = len(dataFrame.index)
    current = 0
    errors = 0
    for index, row in dataFrame.iterrows():
        try:
            g = geocoder.arcgis(row[index_row])
            lat = g.json['lat']
            lng = g.json['lng']
            dict_coordinates[index] = [lat, lng]
            current+=1
        except:
            errors+=1
            print ('Failed to get coordinates for {}: {}'.format(index_row, sys.exc_info()[0]))

    dataFrame['latitude'] = 0.0
    dataFrame['longitude'] = 0.0

    for k, v in dict_coordinates.items():
        dataFrame.loc[k,'latitude']=v[0]
        dataFrame.loc[k,'longitude']=v[1]

    print('Done: Total: {} Success: {} Error {}'.format(total_count, current, errors))
print('Environment was initializied')

url_schools = 'https://rejstriky.msmt.cz/opendata/vrejcz010.xml'
file_schools = 'schools.xml'
results = requests.get(url_schools)
results.content
with open(file_schools, 'w') as file:
    file.write(results.text)
print('Loaded')

import xml.etree.ElementTree as et 
xtree = et.parse(file_schools)
xroot = xtree.getroot()

dic_scools = []

try:
    for entry in xroot.findall('PravniSubjekt'):
        place_group = entry.find('SkolyZarizeni')
        if(place_group is None):
            continue
        for place in place_group.findall('SkolaZarizeni'):
            s_id = place.find('IZO').text
            s_type = place.find('SkolaDruhTyp').text
            s_name = place.find('SkolaPlnyNazev').text
            s_capasity = place.find('SkolaKapacita').text
            s_adress = place.find('SkolaMistaVykonuCinnosti')
            s_actual_add = s_adress.find('SkolaMistoVykonuCinnosti')
            s_addres1 =  s_actual_add.find('MistoAdresa1').text
            s_addres2 =  s_actual_add.find('MistoAdresa2').text
            s_addres3 =  s_actual_add.find('MistoAdresa3').text
            dic_scools.append([s_id, s_name,  s_type, s_capasity, '{} {} {}'.format(s_addres1, s_addres2, s_addres3)])
    print('Completed. Total schools and educational centers count: {}'.format(len(dic_scools)))
except:
    print ('Exception', sys.exc_info()[0]) 

columns = ['id', 'name', 'type', 'capacity', 'address']
df_education = pd.DataFrame(dic_scools, columns = columns)

print('Dataframe created: {},{}'.format(df_education.shape[0], df_education.shape[1]))

#upload to datastore
df_prague.to_csv('prague_schools.csv')
upload_file(storage_creds,'prague_schools.csv','prague_schools.csv')

#Check for predefinied types at schools dataframe
types = df_education['type'].unique()
print('Types in XML file')
for t in types:
    print(t,df_education[df_education.type == t].iloc[0,1])

#filtering types
with pd.option_context('mode.chained_assignment', None):
    types = ['B00', 'F10', 'C00','H22', 'G11']
    types_shu = types[0:3]
    df_education_selected = df_education.loc[df_education.type.isin(types)]
    df_education_selected.loc[df_education_selected['type'].isin(types_shu), 'Type'] = 'school'
    df_education_selected = df_education_selected.fillna('educatioanal center')

print('Schools and educational centers count {}'.format(df_education_selected.shape[0]))
print('Unique types {}'.format(df_education_selected['Type'].unique()))

#Cleaning and retriving coordinates
df_education_selected.loc[0:, 'District_Name'] = df_education_selected.loc[0:,'address'].apply(lambda x: ' '.join(x.split()[-2:]).lower())

columns_to_drop = ['id','name','capacity', 'type']
df_education_selected.drop(columns = columns_to_drop, inplace = True)

get_coordinates(df_education_selected, 'address')

df_education_selected.drop(columns = ['address'], inplace= True)
df_education_selected.head()

TypeDistrict_Namelatitudelongitude
schoolpraha 450.00862014.448992
schoolpraha 150.08034414.415264

, . .


, , 1623 . GitHub Repository.



OpenStreetMap. , . , .


#Loading data from previous steps
poi_file_name = files['poi']
population_file_name = files['districts']

df_prague_population, selected_pois = get_data(population_file_name, poi_file_name)

print('Total POIs to explore: {}'.format(len(selected_pois)))
print('Total Districts to explore: {}'.format(len(df_prague_population))) 

#Buiding graph
start_time = time.time()
bbox = get_bounding_box(df_prague_population['Geometry'])
bbox_string = '_'.join([str(x) for x in bbox])
net_filename = 'network_{}.h5'.format(bbox_string)

print('Selected region bounding box is {}'.format(','.join([str(x) for x in bbox])) )

bbox_aspect_ratio = (bbox[2] - bbox[0]) / (bbox[3] - bbox[1])

print("Build new network")
network = osm.pdna_network_from_bbox(bbox[3], bbox[2], bbox[1], bbox[0],network_type='walk')
print ('Remove low-connectivity nodes and save to h5')
lcn = network.low_connectivity_nodes(impedance=1000, count=10, imp_name='distance')
network.save_hdf5(net_filename, rm_nodes=lcn)
upload_file(storage_creds,net_filename,net_filename)

print('Network with {:,} nodes builded in {:,.2f} secs'.format(len(network.node_ids), time.time()-start_time))

#Statistics
#Edge node pairs completed. Took 311.64 seconds
#Returning processed graph with 140,877 nodes and 204,649 edges...
#Completed OSM data download and Pandana node and edge table creation in 334.49 seconds
#Remove low-connectivity nodes and save to h5 File #network_14.224437012000067_49.94190007000003_14.706787572000053_50.17742967400005.h5 Uploaded
#Network with 140,877 nodes builded in 701.63 secs

140 822 204 575 . B 10 . GitHub. . 2 .


Data aquisition and cleaning



1,3 . — : 110 . 130 . . 70 . : , , , . — 90 100 . — , .



, . , , , , 160 180 1000 .


, , 1000 .



-10 . — 8 4.



. . -10 , .


, . . , , , . 1 , , , 1 -10. , 4 , /1000. , , , .



. 1000 , — .







, , . ( — , — , , , .)


. Pandana. . 140 822 5 . ( , , .)



. ( 3 .)


id1_school2_school3_school1_educational center
172508218.384003452.865997502.253998124.689003
17251042.796001326.665985347.58200150.898998
172512226.128006290.959991300.862000421.157990
172513353.912994393.170990442.434998627.351990
172514270.234985443.700989492.393005711.030029

k-. 1950- . , .


EM-, . k.


, , . , , .


, - . , , . .


, — Elbow. , 4 .


, , = /1300.



Cluster No0123
Walkability Score1.32.31.01.7
Walking time (minutes)
Schools944619
Hobby36452044
Library30452344
Sport facilities843614
Playgrounds45454545






4 .


â„–2 , . , 15 . , .




, â„–2 , , , .


— 0 — 1,3. , 10 . . — 30 .





10-16 . - . 4 .


. , , . , .



  1. Living Streets (The Pedestrians’ Association) A LIVING STREETS REPORT
  2. Criterion distances and correlates of active transportation to school in Belgian older adolescents. Delfien Van Dyck, Ilse De Bourdeaudhuij, Greet Cardon & Benedicte Deforche
  3. Naumann, S., & Kovalyov, M. Y. (2017). Pedestrian route search based on OpenStreetMap. In Intelligent Transport Systems and Travel Behaviour (pp. 87-96). Cham: Springer.
  4. Pandana
  5. THE MECHANICS OF WALKING IN CHILDREN
  6. OSMnx: Python for Street Networks
  7. : Wikipedia
  8. : Opendata Prague
  9. : Opendata Prague

All Articles