Browse Source

cleaned up bar maps

master
H.T. Kruitbosch 4 years ago
parent
commit
7da2bfc400
  1. 7
      Readme.md
  2. 5637
      maps/bar-maps/armen (lichaamsdeel).html
  3. 4463
      maps/bar-maps/avond.html
  4. 4229
      maps/bar-maps/bij (insect).html
  5. 3008
      maps/bar-maps/blad (aan een boom).html
  6. 4381
      maps/bar-maps/borst (lichaamsdeel).html
  7. 3008
      maps/bar-maps/dag.html
  8. 4896
      maps/bar-maps/deurtje.html
  9. 3597
      maps/bar-maps/geel.html
  10. 5286
      maps/bar-maps/gegaan.html
  11. 4615
      maps/bar-maps/gezet.html
  12. 3437
      maps/bar-maps/heel.html
  13. 20
      maps/bar-maps/index.html
  14. 3874
      maps/bar-maps/kaas.html
  15. 4775
      maps/bar-maps/koken.html
  16. 4381
      maps/bar-maps/oog.html
  17. 4381
      maps/bar-maps/sprak (toe).html
  18. 3987
      maps/bar-maps/tand.html
  19. 3753
      maps/bar-maps/trein.html
  20. 1947
      maps/bar-maps/vis.html
  21. 4658
      maps/bar-maps/zaterdag.html
  22. 409
      notebooks/Bar Maps per word for Pronounciation Occurence in Frysian Municipalities.ipynb
  23. 20
      qgis/stimmen.qgs.qto3settings
  24. 172
      stimmen/folium.py
  25. 39
      stimmen/geojson.py

7
Readme.md

@ -74,3 +74,10 @@ This is a simple example for the created gabmap files. @@ -74,3 +74,10 @@ This is a simple example for the created gabmap files.
* [geojson](data/Gabmap_example.geojson)
* [percentages](data/Pronunciation_percentages_example.gabmap.tsv)
* [pronunciation](data/Pronunciations_example.gabmap.tsv)
### Bar Maps per word for Pronounciation Occurence in Frysian Municipalities
For each word, a map illustrates the pronunciation occurrence as measured by the prediction quiz, per Frysian
municipality.
[notebook](notebooks/Bar%20Maps%20per%20word%20for%20Pronounciation%20Occurence%20in%20Frysian%20Municipalities.ipynb)

5637
maps/bar-maps/armen (lichaamsdeel).html

File diff suppressed because one or more lines are too long

4463
maps/bar-maps/avond.html

File diff suppressed because one or more lines are too long

4229
maps/bar-maps/bij (insect).html

File diff suppressed because one or more lines are too long

3008
maps/bar-maps/blad (aan een boom).html

File diff suppressed because one or more lines are too long

4381
maps/bar-maps/borst (lichaamsdeel).html

File diff suppressed because one or more lines are too long

3008
maps/bar-maps/dag.html

File diff suppressed because one or more lines are too long

4896
maps/bar-maps/deurtje.html

File diff suppressed because one or more lines are too long

3597
maps/bar-maps/geel.html

File diff suppressed because one or more lines are too long

5286
maps/bar-maps/gegaan.html

File diff suppressed because one or more lines are too long

4615
maps/bar-maps/gezet.html

File diff suppressed because one or more lines are too long

3437
maps/bar-maps/heel.html

File diff suppressed because one or more lines are too long

20
maps/bar-maps/index.html

@ -0,0 +1,20 @@ @@ -0,0 +1,20 @@
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/>
<a href="avond.html">avond<a><br/>
<a href="bij (insect).html">bij (insect)<a><br/>
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/>
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/>
<a href="dag.html">dag<a><br/>
<a href="deurtje.html">deurtje<a><br/>
<a href="geel.html">geel<a><br/>
<a href="gegaan.html">gegaan<a><br/>
<a href="gezet.html">gezet<a><br/>
<a href="heel.html">heel<a><br/>
<a href="index.html">index<a><br/>
<a href="kaas.html">kaas<a><br/>
<a href="koken.html">koken<a><br/>
<a href="oog.html">oog<a><br/>
<a href="sprak (toe).html">sprak (toe)<a><br/>
<a href="tand.html">tand<a><br/>
<a href="trein.html">trein<a><br/>
<a href="vis.html">vis<a><br/>
<a href="zaterdag.html">zaterdag<a></body></html>

3874
maps/bar-maps/kaas.html

File diff suppressed because one or more lines are too long

4775
maps/bar-maps/koken.html

File diff suppressed because one or more lines are too long

4381
maps/bar-maps/oog.html

File diff suppressed because one or more lines are too long

4381
maps/bar-maps/sprak (toe).html

File diff suppressed because one or more lines are too long

3987
maps/bar-maps/tand.html

File diff suppressed because one or more lines are too long

3753
maps/bar-maps/trein.html

File diff suppressed because one or more lines are too long

1947
maps/bar-maps/vis.html

File diff suppressed because one or more lines are too long

4658
maps/bar-maps/zaterdag.html

File diff suppressed because one or more lines are too long

409
notebooks/Bar Maps per word for Pronounciation Occurence in Frysian Municipalities.ipynb

File diff suppressed because one or more lines are too long

20
qgis/stimmen.qgs.qto3settings

@ -35,15 +35,15 @@ @@ -35,15 +35,15 @@
"comboBox_Label": null,
"comboBox_ObjectType": 1,
"heightWidget": {
"comboData": 1,
"comboText": "Absolute value",
"comboData": 101,
"comboText": " \"distance 1\"",
"editText": "0",
"type": 4
},
"labelHeightWidget": {
"comboData": 2,
"comboText": "Height from point",
"editText": "7309.11282486",
"editText": "7500",
"type": 6
},
"radioButton_IntersectingFeatures": true,
@ -62,12 +62,12 @@ @@ -62,12 +62,12 @@
"styleWidget2": {
"comboData": 1,
"comboText": "Fixed value",
"editText": "1000",
"editText": "300",
"type": 1
},
"styleWidget3": {
"comboData": 103,
"comboText": "\"distance1\"",
"comboData": 101,
"comboText": "\"distance 1\"",
"editText": "300",
"type": 1
},
@ -77,5 +77,11 @@ @@ -77,5 +77,11 @@
}
},
"PluginVersion": "1.4.2",
"Template": "3DViewer.html"
"Template": "3DViewer.html",
"WORLD": {
"lineEdit_BaseSize": "100",
"lineEdit_Color": "",
"lineEdit_zFactor": "1.5",
"lineEdit_zShift": "0"
}
}

172
stimmen/folium.py

@ -0,0 +1,172 @@ @@ -0,0 +1,172 @@
import folium
from jupyter_progressbar import ProgressBar
from pygeoif.geometry import mapping
from shapely.geometry.geo import shape, box
from stimmen.cbs import data_file
from html import escape
import numpy as np
from stimmen.latitude_longitude import reverse_latitude_longitude
def get_palette(n, no_black=True, no_white=True):
with open(data_file('data', 'glasbey', '{}_colors.txt'.format(n + no_black + no_white))) as f:
return [
'#%02x%02x%02x' % tuple(int(c) for c in line.replace('\n', '').split(','))
for line in f
if not no_black or line != '0,0,0\n'
if not no_white or line != '255,255,255\n'
]
def colored_name(name, color):
return '<span style=\\"color:{}; \\">{}</span>'.format(color, name)
def region_area_cdf(region_shape, resolution=10000):
xmin, ymin, xmax, ymax = region_shape.bounds
shape_area = region_shape.area
spaces = np.linspace(xmin, xmax, resolution + 1)
return np.array([
box(xmin, ymin, xmax_, ymax).intersection(region_shape).area / shape_area
for xmax_ in spaces
])
# Only slightly faster than region_area_cdf.
# def fast_sliced_shape_areas(region_shape, recursions=13):
# results = np.zeros(2 ** recursions)
# xmin, ymin, xmax, ymax = region_shape.bounds
# total = 0
#
# def f(shape_, xmin, ymin, xmax, ymax, recursions, results_):
# nonlocal total
# shape_ = box(xmin, ymin, xmax, ymax).intersection(shape_)
# if recursions == 0:
# assert results_.shape == (1,)
# results_[0] = shape_.area
# total += shape_.area
# else:
# xmiddle = xmin + (xmax - xmin) / 2
# middle_index = len(results_) // 2
# f(shape_, xmin, ymin, xmiddle, ymax, recursions - 1, results_[:middle_index])
# f(shape_, xmiddle, ymin, xmax, ymax, recursions - 1, results_[middle_index:])
#
# f(region_shape, xmin, ymin, xmax, ymax, recursions, results)
# return results / results.sum() * region_shape.area
def area_adjust_boundaries(region_shape, boundaries, region_cdf_cache=None, resolution=10000):
"""Adjust the boundaries from percentage of the width of a shape, to percentage of the area of a shape"""
if region_cdf_cache is None:
region_cdf_cache = region_area_cdf(region_shape, resolution)
elif not isinstance(region_cdf_cache, np.ndarray):
region_cdf_cache = np.array(region_cdf_cache)
return width_adjust_boundaries(
region_shape,
np.abs(region_cdf_cache[None, :] - boundaries[:, None]).argmin(axis=1) / resolution
)
def width_adjust_boundaries(region_shape, boundaries):
xmin, _, xmax, _ = region_shape.bounds
return boundaries * (xmax - xmin) + xmin
def pronunciation_bars(
regions, dataframe,
region_name_property, region_name_column,
group_column='answer_text',
cutoff_percentage=0.05,
normalize_area=True,
progress_bar=False,
):
# all values of group_column that appear at least cutoff_percentage in one of the regions
relevant_groups = {
group
for region_name, region_rows in dataframe.groupby(region_name_column)
for group, aggregation in region_rows.groupby(
group_column).agg({group_column: len}).iterrows()
if aggregation[group_column] >= cutoff_percentage * len(region_rows)
}
group_to_color = dict(zip(relevant_groups, get_palette(len(relevant_groups))))
group_to_color['other'] = '#ccc'
n_other = len(dataframe) - sum(
sum(dataframe[group_column] == group_value)
for group_value in relevant_groups
)
# Each FeatureGroup represents all polygons (one for each region) of the relevant_groups
feature_groups = {
group_value: folium.FeatureGroup(
name=colored_name(
'{value} ({amount})'.format(value=escape(group_value), amount=amount),
color
),
overlay=True
)
for group_value, color in group_to_color.items()
for amount in [
sum(dataframe[group_column] == group_value)
if group_value != 'other' else
n_other
] # alias
}
progress_bar = ProgressBar if progress_bar else lambda x: x
# for each region, create the bar-polygons.
for feature in progress_bar(regions['features']):
region_name = feature['properties'][region_name_property]
region_rows = dataframe[dataframe[region_name_column] == region_name]
region_shape = shape(feature['geometry'])
_, ymin, _, ymax = region_shape.bounds
group_values_occurrence = {
group_value: aggregation[group_column]
for group_value, aggregation in region_rows.groupby(group_column).agg({group_column: len}).iterrows()
if group_value in relevant_groups
}
group_values_occurrence['other'] = len(region_rows) - sum(group_values_occurrence.values())
group_values, group_occurrences = zip(*sorted(
group_values_occurrence.items(),
key=lambda x: (x[0] == 'other', -x[1])
))
group_percentages = np.array(group_occurrences) / len(region_rows)
group_boundaries = np.cumsum((0,) + group_occurrences) / len(region_rows)
if normalize_area:
if '__region_shape_cdf_cache' not in feature['properties']:
feature['properties']['__region_shape_cdf_cache'] = region_area_cdf(region_shape).tolist()
group_boundaries = area_adjust_boundaries(
region_shape, group_boundaries,
region_cdf_cache=feature['properties']['__region_shape_cdf_cache']
)
else:
group_boundaries = width_adjust_boundaries(region_shape, group_boundaries)
for group_value, percentage, count, left_boundary, right_boundary in zip(
group_values,
group_percentages,
group_occurrences,
group_boundaries[:-1], group_boundaries[1:]
):
if count == 0 or left_boundary == right_boundary:
continue
bar_shape = region_shape.intersection(box(left_boundary, ymin, right_boundary, ymax))
if bar_shape.area == 0:
continue
polygon = folium.Polygon(
reverse_latitude_longitude(mapping(bar_shape)['coordinates']),
fill_color=group_to_color[group_value],
fill_opacity=0.8,
color=None,
popup='{} ({}, {: 3d}%)'.format(group_value, count, int(round(100 * percentage)))
)
polygon.add_to(feature_groups[group_value])
return feature_groups

39
stimmen/geojson.py

@ -1,5 +1,6 @@ @@ -1,5 +1,6 @@
from pygeoif.geometry import mapping
from shapely.geometry import shape
from shapely.geometry.point import Point
def merge_features(geojson, condition, aggregate={}):
@ -40,4 +41,40 @@ def merge_features(geojson, condition, aggregate={}): @@ -40,4 +41,40 @@ def merge_features(geojson, condition, aggregate={}):
'geometry': mapping(union),
'properties': properties
})
return geojson
return geojson
def inject_geojson_regions_into_dataframe(
geojson, dataframe,
latitude_column='latitude', longitude_column='longitude',
region_name_property='name',
region_name_column='region'
):
"""adds a region_name_column column to the dataframe with the region name as specified
in the region_name_property of the geojson, by checking which geojson feature geometrically
contains the longitude and latitude of the dataframe's row. This allows for faster cross
reference between the geojson and the dataframe compared to always checking shape-point
containment when cross referencing. Operates in place."""
shapes = {
feature['properties'][region_name_property]: shape(feature['geometry'])
for feature in geojson['features']
}
def get_region_name(point):
nonlocal shapes
for region_name, region_shape in shapes.items():
if region_shape.contains(point):
return region_name
point_to_region_name = {
(latitude, longitude): get_region_name(point)
for latitude, longitude in set(zip(dataframe[latitude_column], dataframe[longitude_column]))
for point in [Point(longitude, latitude)] # alias
}
dataframe[region_name_column] = [
point_to_region_name[(latitude, longitude)]
for latitude, longitude in zip(dataframe[latitude_column], dataframe[longitude_column])
]
return dataframe

Loading…
Cancel
Save