25 changed files with 78972 additions and 8 deletions
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,20 @@
@@ -0,0 +1,20 @@
|
||||
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/> |
||||
<a href="avond.html">avond<a><br/> |
||||
<a href="bij (insect).html">bij (insect)<a><br/> |
||||
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/> |
||||
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/> |
||||
<a href="dag.html">dag<a><br/> |
||||
<a href="deurtje.html">deurtje<a><br/> |
||||
<a href="geel.html">geel<a><br/> |
||||
<a href="gegaan.html">gegaan<a><br/> |
||||
<a href="gezet.html">gezet<a><br/> |
||||
<a href="heel.html">heel<a><br/> |
||||
<a href="index.html">index<a><br/> |
||||
<a href="kaas.html">kaas<a><br/> |
||||
<a href="koken.html">koken<a><br/> |
||||
<a href="oog.html">oog<a><br/> |
||||
<a href="sprak (toe).html">sprak (toe)<a><br/> |
||||
<a href="tand.html">tand<a><br/> |
||||
<a href="trein.html">trein<a><br/> |
||||
<a href="vis.html">vis<a><br/> |
||||
<a href="zaterdag.html">zaterdag<a></body></html> |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,172 @@
@@ -0,0 +1,172 @@
|
||||
import folium |
||||
from jupyter_progressbar import ProgressBar |
||||
from pygeoif.geometry import mapping |
||||
from shapely.geometry.geo import shape, box |
||||
|
||||
from stimmen.cbs import data_file |
||||
from html import escape |
||||
import numpy as np |
||||
|
||||
from stimmen.latitude_longitude import reverse_latitude_longitude |
||||
|
||||
|
||||
def get_palette(n, no_black=True, no_white=True): |
||||
with open(data_file('data', 'glasbey', '{}_colors.txt'.format(n + no_black + no_white))) as f: |
||||
return [ |
||||
'#%02x%02x%02x' % tuple(int(c) for c in line.replace('\n', '').split(',')) |
||||
for line in f |
||||
if not no_black or line != '0,0,0\n' |
||||
if not no_white or line != '255,255,255\n' |
||||
] |
||||
|
||||
|
||||
def colored_name(name, color): |
||||
return '<span style=\\"color:{}; \\">{}</span>'.format(color, name) |
||||
|
||||
|
||||
def region_area_cdf(region_shape, resolution=10000): |
||||
xmin, ymin, xmax, ymax = region_shape.bounds |
||||
shape_area = region_shape.area |
||||
spaces = np.linspace(xmin, xmax, resolution + 1) |
||||
return np.array([ |
||||
box(xmin, ymin, xmax_, ymax).intersection(region_shape).area / shape_area |
||||
for xmax_ in spaces |
||||
]) |
||||
|
||||
|
||||
# Only slightly faster than region_area_cdf. |
||||
# def fast_sliced_shape_areas(region_shape, recursions=13): |
||||
# results = np.zeros(2 ** recursions) |
||||
# xmin, ymin, xmax, ymax = region_shape.bounds |
||||
# total = 0 |
||||
# |
||||
# def f(shape_, xmin, ymin, xmax, ymax, recursions, results_): |
||||
# nonlocal total |
||||
# shape_ = box(xmin, ymin, xmax, ymax).intersection(shape_) |
||||
# if recursions == 0: |
||||
# assert results_.shape == (1,) |
||||
# results_[0] = shape_.area |
||||
# total += shape_.area |
||||
# else: |
||||
# xmiddle = xmin + (xmax - xmin) / 2 |
||||
# middle_index = len(results_) // 2 |
||||
# f(shape_, xmin, ymin, xmiddle, ymax, recursions - 1, results_[:middle_index]) |
||||
# f(shape_, xmiddle, ymin, xmax, ymax, recursions - 1, results_[middle_index:]) |
||||
# |
||||
# f(region_shape, xmin, ymin, xmax, ymax, recursions, results) |
||||
# return results / results.sum() * region_shape.area |
||||
|
||||
|
||||
def area_adjust_boundaries(region_shape, boundaries, region_cdf_cache=None, resolution=10000): |
||||
"""Adjust the boundaries from percentage of the width of a shape, to percentage of the area of a shape""" |
||||
if region_cdf_cache is None: |
||||
region_cdf_cache = region_area_cdf(region_shape, resolution) |
||||
elif not isinstance(region_cdf_cache, np.ndarray): |
||||
region_cdf_cache = np.array(region_cdf_cache) |
||||
return width_adjust_boundaries( |
||||
region_shape, |
||||
np.abs(region_cdf_cache[None, :] - boundaries[:, None]).argmin(axis=1) / resolution |
||||
) |
||||
|
||||
|
||||
def width_adjust_boundaries(region_shape, boundaries): |
||||
xmin, _, xmax, _ = region_shape.bounds |
||||
return boundaries * (xmax - xmin) + xmin |
||||
|
||||
|
||||
def pronunciation_bars( |
||||
regions, dataframe, |
||||
region_name_property, region_name_column, |
||||
group_column='answer_text', |
||||
cutoff_percentage=0.05, |
||||
normalize_area=True, |
||||
progress_bar=False, |
||||
): |
||||
# all values of group_column that appear at least cutoff_percentage in one of the regions |
||||
relevant_groups = { |
||||
group |
||||
for region_name, region_rows in dataframe.groupby(region_name_column) |
||||
for group, aggregation in region_rows.groupby( |
||||
group_column).agg({group_column: len}).iterrows() |
||||
if aggregation[group_column] >= cutoff_percentage * len(region_rows) |
||||
} |
||||
|
||||
group_to_color = dict(zip(relevant_groups, get_palette(len(relevant_groups)))) |
||||
group_to_color['other'] = '#ccc' |
||||
|
||||
n_other = len(dataframe) - sum( |
||||
sum(dataframe[group_column] == group_value) |
||||
for group_value in relevant_groups |
||||
) |
||||
|
||||
# Each FeatureGroup represents all polygons (one for each region) of the relevant_groups |
||||
feature_groups = { |
||||
group_value: folium.FeatureGroup( |
||||
name=colored_name( |
||||
'{value} ({amount})'.format(value=escape(group_value), amount=amount), |
||||
color |
||||
), |
||||
overlay=True |
||||
) |
||||
for group_value, color in group_to_color.items() |
||||
for amount in [ |
||||
sum(dataframe[group_column] == group_value) |
||||
if group_value != 'other' else |
||||
n_other |
||||
] # alias |
||||
} |
||||
|
||||
progress_bar = ProgressBar if progress_bar else lambda x: x |
||||
|
||||
# for each region, create the bar-polygons. |
||||
for feature in progress_bar(regions['features']): |
||||
region_name = feature['properties'][region_name_property] |
||||
region_rows = dataframe[dataframe[region_name_column] == region_name] |
||||
region_shape = shape(feature['geometry']) |
||||
_, ymin, _, ymax = region_shape.bounds |
||||
|
||||
group_values_occurrence = { |
||||
group_value: aggregation[group_column] |
||||
for group_value, aggregation in region_rows.groupby(group_column).agg({group_column: len}).iterrows() |
||||
if group_value in relevant_groups |
||||
} |
||||
group_values_occurrence['other'] = len(region_rows) - sum(group_values_occurrence.values()) |
||||
group_values, group_occurrences = zip(*sorted( |
||||
group_values_occurrence.items(), |
||||
key=lambda x: (x[0] == 'other', -x[1]) |
||||
)) |
||||
|
||||
group_percentages = np.array(group_occurrences) / len(region_rows) |
||||
group_boundaries = np.cumsum((0,) + group_occurrences) / len(region_rows) |
||||
if normalize_area: |
||||
if '__region_shape_cdf_cache' not in feature['properties']: |
||||
feature['properties']['__region_shape_cdf_cache'] = region_area_cdf(region_shape).tolist() |
||||
group_boundaries = area_adjust_boundaries( |
||||
region_shape, group_boundaries, |
||||
region_cdf_cache=feature['properties']['__region_shape_cdf_cache'] |
||||
) |
||||
else: |
||||
group_boundaries = width_adjust_boundaries(region_shape, group_boundaries) |
||||
|
||||
for group_value, percentage, count, left_boundary, right_boundary in zip( |
||||
group_values, |
||||
group_percentages, |
||||
group_occurrences, |
||||
group_boundaries[:-1], group_boundaries[1:] |
||||
): |
||||
if count == 0 or left_boundary == right_boundary: |
||||
continue |
||||
|
||||
bar_shape = region_shape.intersection(box(left_boundary, ymin, right_boundary, ymax)) |
||||
if bar_shape.area == 0: |
||||
continue |
||||
polygon = folium.Polygon( |
||||
reverse_latitude_longitude(mapping(bar_shape)['coordinates']), |
||||
fill_color=group_to_color[group_value], |
||||
fill_opacity=0.8, |
||||
color=None, |
||||
popup='{} ({}, {: 3d}%)'.format(group_value, count, int(round(100 * percentage))) |
||||
) |
||||
polygon.add_to(feature_groups[group_value]) |
||||
|
||||
return feature_groups |
Loading…
Reference in new issue