102 changed files with 2187 additions and 1472453 deletions
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,20 +1,22 @@
@@ -1,20 +1,22 @@
|
||||
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/> |
||||
<a href="avond.html">avond<a><br/> |
||||
<a href="bij (insect).html">bij (insect)<a><br/> |
||||
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/> |
||||
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/> |
||||
<a href="dag.html">dag<a><br/> |
||||
<a href="deurtje.html">deurtje<a><br/> |
||||
<a href="geel.html">geel<a><br/> |
||||
<a href="gegaan.html">gegaan<a><br/> |
||||
<a href="gezet.html">gezet<a><br/> |
||||
<a href="heel.html">heel<a><br/> |
||||
<html><head></head><body> <a href="gemeentes_avond.html">gemeentes avond<a><br/> |
||||
<a href="index.html">index<a><br/> |
||||
<a href="kaas.html">kaas<a><br/> |
||||
<a href="koken.html">koken<a><br/> |
||||
<a href="oog.html">oog<a><br/> |
||||
<a href="sprak (toe).html">sprak (toe)<a><br/> |
||||
<a href="tand.html">tand<a><br/> |
||||
<a href="trein.html">trein<a><br/> |
||||
<a href="vis.html">vis<a><br/> |
||||
<a href="zaterdag.html">zaterdag<a></body></html> |
||||
<a href="neighborhood_armen (lichaamsdeel).html">neighborhood armen (lichaamsdeel)<a><br/> |
||||
<a href="neighborhood_avond.html">neighborhood avond<a><br/> |
||||
<a href="neighborhood_bij (insect).html">neighborhood bij (insect)<a><br/> |
||||
<a href="neighborhood_blad (aan een boom).html">neighborhood blad (aan een boom)<a><br/> |
||||
<a href="neighborhood_borst (lichaamsdeel).html">neighborhood borst (lichaamsdeel)<a><br/> |
||||
<a href="neighborhood_dag.html">neighborhood dag<a><br/> |
||||
<a href="neighborhood_deurtje.html">neighborhood deurtje<a><br/> |
||||
<a href="neighborhood_geel.html">neighborhood geel<a><br/> |
||||
<a href="neighborhood_gegaan.html">neighborhood gegaan<a><br/> |
||||
<a href="neighborhood_gezet.html">neighborhood gezet<a><br/> |
||||
<a href="neighborhood_heel.html">neighborhood heel<a><br/> |
||||
<a href="neighborhood_kaas.html">neighborhood kaas<a><br/> |
||||
<a href="neighborhood_koken.html">neighborhood koken<a><br/> |
||||
<a href="neighborhood_oog.html">neighborhood oog<a><br/> |
||||
<a href="neighborhood_sprak (toe).html">neighborhood sprak (toe)<a><br/> |
||||
<a href="neighborhood_tand.html">neighborhood tand<a><br/> |
||||
<a href="neighborhood_trein.html">neighborhood trein<a><br/> |
||||
<a href="neighborhood_vis.html">neighborhood vis<a><br/> |
||||
<a href="neighborhood_zaterdag.html">neighborhood zaterdag<a><br/> |
||||
<a href="wijken_avond.html">wijken avond<a></body></html> |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,20 +0,0 @@
@@ -1,20 +0,0 @@
|
||||
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/> |
||||
<a href="avond.html">avond<a><br/> |
||||
<a href="bij (insect).html">bij (insect)<a><br/> |
||||
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/> |
||||
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/> |
||||
<a href="dag.html">dag<a><br/> |
||||
<a href="deurtje.html">deurtje<a><br/> |
||||
<a href="geel.html">geel<a><br/> |
||||
<a href="gegaan.html">gegaan<a><br/> |
||||
<a href="gezet.html">gezet<a><br/> |
||||
<a href="heel.html">heel<a><br/> |
||||
<a href="index.html">index<a><br/> |
||||
<a href="kaas.html">kaas<a><br/> |
||||
<a href="koken.html">koken<a><br/> |
||||
<a href="oog.html">oog<a><br/> |
||||
<a href="sprak (toe).html">sprak (toe)<a><br/> |
||||
<a href="tand.html">tand<a><br/> |
||||
<a href="trein.html">trein<a><br/> |
||||
<a href="vis.html">vis<a><br/> |
||||
<a href="zaterdag.html">zaterdag<a></body></html> |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,20 +0,0 @@
@@ -1,20 +0,0 @@
|
||||
<html><head></head><body> <a href="armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/> |
||||
<a href="avond.html">avond<a><br/> |
||||
<a href="bij (insect).html">bij (insect)<a><br/> |
||||
<a href="blad (aan een boom).html">blad (aan een boom)<a><br/> |
||||
<a href="borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/> |
||||
<a href="dag.html">dag<a><br/> |
||||
<a href="deurtje.html">deurtje<a><br/> |
||||
<a href="geel.html">geel<a><br/> |
||||
<a href="gegaan.html">gegaan<a><br/> |
||||
<a href="gezet.html">gezet<a><br/> |
||||
<a href="heel.html">heel<a><br/> |
||||
<a href="index.html">index<a><br/> |
||||
<a href="kaas.html">kaas<a><br/> |
||||
<a href="koken.html">koken<a><br/> |
||||
<a href="oog.html">oog<a><br/> |
||||
<a href="sprak (toe).html">sprak (toe)<a><br/> |
||||
<a href="tand.html">tand<a><br/> |
||||
<a href="trein.html">trein<a><br/> |
||||
<a href="vis.html">vis<a><br/> |
||||
<a href="zaterdag.html">zaterdag<a></body></html> |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,20 +0,0 @@
@@ -1,20 +0,0 @@
|
||||
<html><head></head><body> <a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/armen (lichaamsdeel).html">armen (lichaamsdeel)<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/avond.html">avond<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/bij (insect).html">bij (insect)<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/blad (aan een boom).html">blad (aan een boom)<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/borst (lichaamsdeel).html">borst (lichaamsdeel)<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/dag.html">dag<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/deurtje.html">deurtje<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/geel.html">geel<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/gegaan.html">gegaan<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/gezet.html">gezet<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/heel.html">heel<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/index.html">index<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/kaas.html">kaas<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/koken.html">koken<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/oog.html">oog<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/sprak (toe).html">sprak (toe)<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/tand.html">tand<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/trein.html">trein<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/vis.html">vis<a><br/> |
||||
<a href="http://herbertkruitbosch.com/pronunciation_maps/heatmaps/zaterdag.html">zaterdag<a></body></html> |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,83 +0,0 @@
@@ -1,83 +0,0 @@
|
||||
{ |
||||
"cells": [ |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"# Gabmap format\n", |
||||
"\n", |
||||
"Exploration of the format of the lines in example Gabmap files Martijn had sent." |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": null, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"with open('../data/martijn_format/Dutch613-coordinates.txt') as f:\n", |
||||
" coordinates = list(f)\n", |
||||
" \n", |
||||
"with open('../data/martijn_format/Nederlands-ipa.utxt') as f:\n", |
||||
" table = list(f)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": null, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"coordinates[0].split('\\t')" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": null, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"coordinates[1].split('\\t')" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": null, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"table[0].split('\\t')" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": null, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"table[1].split('\\t')" |
||||
] |
||||
} |
||||
], |
||||
"metadata": { |
||||
"kernelspec": { |
||||
"display_name": "Python 3", |
||||
"language": "python", |
||||
"name": "python3" |
||||
}, |
||||
"language_info": { |
||||
"codemirror_mode": { |
||||
"name": "ipython", |
||||
"version": 3 |
||||
}, |
||||
"file_extension": ".py", |
||||
"mimetype": "text/x-python", |
||||
"name": "python", |
||||
"nbconvert_exporter": "python", |
||||
"pygments_lexer": "ipython3", |
||||
"version": "3.6.5" |
||||
} |
||||
}, |
||||
"nbformat": 4, |
||||
"nbformat_minor": 2 |
||||
} |
@ -1,458 +0,0 @@
@@ -1,458 +0,0 @@
|
||||
{ |
||||
"cells": [ |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"# Geographical pronunciation tables, simple example\n", |
||||
"\n", |
||||
"Simple example to create gabmap files for two words with few pronunciations an two regions." |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 1, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"import sys\n", |
||||
"sys.path.append('..')\n", |
||||
"\n", |
||||
"import pandas\n", |
||||
"import MySQLdb\n", |
||||
"import json\n", |
||||
"import copy\n", |
||||
"\n", |
||||
"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n", |
||||
"\n", |
||||
"from shapely.geometry import shape, Point\n", |
||||
"\n", |
||||
"from gabmap import create_gabmap_dataframes\n", |
||||
"\n", |
||||
"from stimmen.geojson import merge_features" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 2, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"with open('../data/Friesland_wijken.geojson') as f:\n", |
||||
" regions = json.load(f)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"## Load and simplify" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 3, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"# Answers to how participants state a word should be pronounced\n", |
||||
"\n", |
||||
"answers = pandas.read_sql('''\n", |
||||
"SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n", |
||||
"FROM core_surveyresult as survey\n", |
||||
"INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n", |
||||
"INNER JOIN core_predictionquizresultquestionanswer as answer\n", |
||||
" ON result.id = answer.prediction_quiz_id\n", |
||||
"''', db)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 4, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"regions_simple = merge_features(copy.deepcopy(regions),\n", |
||||
" condition=lambda feature: feature['properties']['GM_NAAM'] == 'Heerenveen',\n", |
||||
")\n", |
||||
"\n", |
||||
"regions_simple = merge_features(\n", |
||||
" regions_simple,\n", |
||||
" condition=lambda feature: feature['properties']['GM_NAAM'] == 'Leeuwarden',\n", |
||||
")\n", |
||||
"regions_simple['features'] = regions_simple['features'][-2:]\n", |
||||
"\n", |
||||
"regions_simple['features'][0]['properties']['name'] = 'Heerenveen'\n", |
||||
"regions_simple['features'][1]['properties']['name'] = 'Leeuwarden'" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 5, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"answers_simple = answers[\n", |
||||
" (answers['question_text'] == '\"blad\" (aan een boom)') |\n", |
||||
" (answers['question_text'] == '\"vis\"')\n", |
||||
"].copy()\n", |
||||
"\n", |
||||
"answers_simple['question_text'] = answers_simple['question_text'].map(\n", |
||||
" lambda x: x.replace('\"', '').replace('*', ''))\n", |
||||
"\n", |
||||
"answers_simple['answer_text'] = answers_simple['answer_text'].map(\n", |
||||
" lambda x: x[x.find('('):x.find(')')][1:])" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"Two words, boom and vis, with each 4 and 2 pronunciations" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 6, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"text/html": [ |
||||
"<div>\n", |
||||
"<style scoped>\n", |
||||
" .dataframe tbody tr th:only-of-type {\n", |
||||
" vertical-align: middle;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe tbody tr th {\n", |
||||
" vertical-align: top;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe thead th {\n", |
||||
" text-align: right;\n", |
||||
" }\n", |
||||
"</style>\n", |
||||
"<table border=\"1\" class=\"dataframe\">\n", |
||||
" <thead>\n", |
||||
" <tr style=\"text-align: right;\">\n", |
||||
" <th></th>\n", |
||||
" <th>answer_text</th>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th>question_text</th>\n", |
||||
" <th></th>\n", |
||||
" </tr>\n", |
||||
" </thead>\n", |
||||
" <tbody>\n", |
||||
" <tr>\n", |
||||
" <th>blad (aan een boom)</th>\n", |
||||
" <td>4</td>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th>vis</th>\n", |
||||
" <td>2</td>\n", |
||||
" </tr>\n", |
||||
" </tbody>\n", |
||||
"</table>\n", |
||||
"</div>" |
||||
], |
||||
"text/plain": [ |
||||
" answer_text\n", |
||||
"question_text \n", |
||||
"blad (aan een boom) 4\n", |
||||
"vis 2" |
||||
] |
||||
}, |
||||
"execution_count": 6, |
||||
"metadata": {}, |
||||
"output_type": "execute_result" |
||||
} |
||||
], |
||||
"source": [ |
||||
"answers_simple.groupby('question_text').agg({'answer_text': lambda x: len(set(x))})" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 7, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"centroids_example, pronunciations_example, counts_example = create_gabmap_dataframes(\n", |
||||
" regions_simple, answers_simple,\n", |
||||
" latitude_column='user_lat', longitude_column='user_lng',\n", |
||||
" word_column='question_text', pronunciation_column='answer_text',\n", |
||||
" region_name_property='name'\n", |
||||
")" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"## Resulting tables\n", |
||||
"\n", |
||||
"Stored as tab separated files for gabmap" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 8, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"text/html": [ |
||||
"<div>\n", |
||||
"<style scoped>\n", |
||||
" .dataframe tbody tr th:only-of-type {\n", |
||||
" vertical-align: middle;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe tbody tr th {\n", |
||||
" vertical-align: top;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe thead th {\n", |
||||
" text-align: right;\n", |
||||
" }\n", |
||||
"</style>\n", |
||||
"<table border=\"1\" class=\"dataframe\">\n", |
||||
" <thead>\n", |
||||
" <tr style=\"text-align: right;\">\n", |
||||
" <th></th>\n", |
||||
" <th>latitude</th>\n", |
||||
" <th>longitude</th>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th>#name</th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" </tr>\n", |
||||
" </thead>\n", |
||||
" <tbody>\n", |
||||
" <tr>\n", |
||||
" <th>Heerenveen</th>\n", |
||||
" <td>52.996076</td>\n", |
||||
" <td>5.977925</td>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th>Leeuwarden</th>\n", |
||||
" <td>53.169940</td>\n", |
||||
" <td>5.797613</td>\n", |
||||
" </tr>\n", |
||||
" </tbody>\n", |
||||
"</table>\n", |
||||
"</div>" |
||||
], |
||||
"text/plain": [ |
||||
" latitude longitude\n", |
||||
"#name \n", |
||||
"Heerenveen 52.996076 5.977925\n", |
||||
"Leeuwarden 53.169940 5.797613" |
||||
] |
||||
}, |
||||
"execution_count": 8, |
||||
"metadata": {}, |
||||
"output_type": "execute_result" |
||||
} |
||||
], |
||||
"source": [ |
||||
"centroids_example" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 9, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"text/html": [ |
||||
"<div>\n", |
||||
"<style scoped>\n", |
||||
" .dataframe tbody tr th:only-of-type {\n", |
||||
" vertical-align: middle;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe tbody tr th {\n", |
||||
" vertical-align: top;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe thead th {\n", |
||||
" text-align: right;\n", |
||||
" }\n", |
||||
"</style>\n", |
||||
"<table border=\"1\" class=\"dataframe\">\n", |
||||
" <thead>\n", |
||||
" <tr style=\"text-align: right;\">\n", |
||||
" <th></th>\n", |
||||
" <th>blad (aan een boom)</th>\n", |
||||
" <th>vis</th>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" </tr>\n", |
||||
" </thead>\n", |
||||
" <tbody>\n", |
||||
" <tr>\n", |
||||
" <th>Heerenveen</th>\n", |
||||
" <td>blet / blษt / blษd / blษ:t</td>\n", |
||||
" <td>fisk / fษชs</td>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th>Leeuwarden</th>\n", |
||||
" <td>blet / blษt / blษd / blษ:t</td>\n", |
||||
" <td>fisk / fษชs</td>\n", |
||||
" </tr>\n", |
||||
" </tbody>\n", |
||||
"</table>\n", |
||||
"</div>" |
||||
], |
||||
"text/plain": [ |
||||
" blad (aan een boom) vis\n", |
||||
" \n", |
||||
"Heerenveen blet / blษt / blษd / blษ:t fisk / fษชs\n", |
||||
"Leeuwarden blet / blษt / blษd / blษ:t fisk / fษชs" |
||||
] |
||||
}, |
||||
"execution_count": 9, |
||||
"metadata": {}, |
||||
"output_type": "execute_result" |
||||
} |
||||
], |
||||
"source": [ |
||||
"pronunciations_example" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 10, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"text/html": [ |
||||
"<div>\n", |
||||
"<style scoped>\n", |
||||
" .dataframe tbody tr th:only-of-type {\n", |
||||
" vertical-align: middle;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe tbody tr th {\n", |
||||
" vertical-align: top;\n", |
||||
" }\n", |
||||
"\n", |
||||
" .dataframe thead th {\n", |
||||
" text-align: right;\n", |
||||
" }\n", |
||||
"</style>\n", |
||||
"<table border=\"1\" class=\"dataframe\">\n", |
||||
" <thead>\n", |
||||
" <tr style=\"text-align: right;\">\n", |
||||
" <th></th>\n", |
||||
" <th>blad (aan een boom): blet</th>\n", |
||||
" <th>blad (aan een boom): blษt</th>\n", |
||||
" <th>blad (aan een boom): blษd</th>\n", |
||||
" <th>blad (aan een boom): blษ:t</th>\n", |
||||
" <th>vis: fisk</th>\n", |
||||
" <th>vis: fษชs</th>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" <th></th>\n", |
||||
" </tr>\n", |
||||
" </thead>\n", |
||||
" <tbody>\n", |
||||
" <tr>\n", |
||||
" <th>Heerenveen</th>\n", |
||||
" <td>31.654676</td>\n", |
||||
" <td>2.158273</td>\n", |
||||
" <td>2.158273</td>\n", |
||||
" <td>64.028777</td>\n", |
||||
" <td>52.517986</td>\n", |
||||
" <td>47.482014</td>\n", |
||||
" </tr>\n", |
||||
" <tr>\n", |
||||
" <th>Leeuwarden</th>\n", |
||||
" <td>7.865169</td>\n", |
||||
" <td>7.022472</td>\n", |
||||
" <td>8.707865</td>\n", |
||||
" <td>76.404494</td>\n", |
||||
" <td>75.000000</td>\n", |
||||
" <td>25.000000</td>\n", |
||||
" </tr>\n", |
||||
" </tbody>\n", |
||||
"</table>\n", |
||||
"</div>" |
||||
], |
||||
"text/plain": [ |
||||
" blad (aan een boom): blet blad (aan een boom): blษt \\\n", |
||||
" \n", |
||||
"Heerenveen 31.654676 2.158273 \n", |
||||
"Leeuwarden 7.865169 7.022472 \n", |
||||
"\n", |
||||
" blad (aan een boom): blษd blad (aan een boom): blษ:t vis: fisk \\\n", |
||||
" \n", |
||||
"Heerenveen 2.158273 64.028777 52.517986 \n", |
||||
"Leeuwarden 8.707865 76.404494 75.000000 \n", |
||||
"\n", |
||||
" vis: fษชs \n", |
||||
" \n", |
||||
"Heerenveen 47.482014 \n", |
||||
"Leeuwarden 25.000000 " |
||||
] |
||||
}, |
||||
"execution_count": 10, |
||||
"metadata": {}, |
||||
"output_type": "execute_result" |
||||
} |
||||
], |
||||
"source": [ |
||||
"counts_example" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 12, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"pronunciations_example.to_csv('../data/Pronunciations_example.gabmap.tsv', sep='\\t')\n", |
||||
"counts_example.to_csv('../data/Pronunciation_percentages_example.gabmap.tsv', sep='\\t')\n", |
||||
"centroids_example.to_csv('../data/Centroids_example.gabmap.tsv', sep='\\t', columns=['longitude', 'latitude'])\n", |
||||
"with open('../data/Gabmap_example.geojson', 'w') as f:\n", |
||||
" json.dump(regions_simple, f, indent=1)" |
||||
] |
||||
} |
||||
], |
||||
"metadata": { |
||||
"kernelspec": { |
||||
"display_name": "Python 3", |
||||
"language": "python", |
||||
"name": "python3" |
||||
}, |
||||
"language_info": { |
||||
"codemirror_mode": { |
||||
"name": "ipython", |
||||
"version": 3 |
||||
}, |
||||
"file_extension": ".py", |
||||
"mimetype": "text/x-python", |
||||
"name": "python", |
||||
"nbconvert_exporter": "python", |
||||
"pygments_lexer": "ipython3", |
||||
"version": "3.6.5" |
||||
} |
||||
}, |
||||
"nbformat": 4, |
||||
"nbformat_minor": 2 |
||||
} |
@ -1,157 +0,0 @@
@@ -1,157 +0,0 @@
|
||||
{ |
||||
"cells": [ |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"# Geographical pronunciation tables\n", |
||||
"\n", |
||||
"Creates gabmap files with region centroids, percentages and pronunciations for wijken in Friesland." |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 1, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"import sys\n", |
||||
"sys.path.append('..')\n", |
||||
"\n", |
||||
"import pandas\n", |
||||
"import MySQLdb\n", |
||||
"import json\n", |
||||
"import copy\n", |
||||
"\n", |
||||
"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')\n", |
||||
"\n", |
||||
"from shapely.geometry import shape, Point\n", |
||||
"\n", |
||||
"from gabmap import create_gabmap_dataframes" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 2, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"with open('../data/Friesland_wijken.geojson') as f:\n", |
||||
" regions = json.load(f)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 3, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"# Answers to how participants state a word should be pronounced\n", |
||||
"\n", |
||||
"answers = pandas.read_sql('''\n", |
||||
"SELECT prediction_quiz_id, user_lat, user_lng, question_text, answer_text\n", |
||||
"FROM core_surveyresult as survey\n", |
||||
"INNER JOIN core_predictionquizresult as result ON survey.id = result.survey_result_id\n", |
||||
"INNER JOIN core_predictionquizresultquestionanswer as answer\n", |
||||
" ON result.id = answer.prediction_quiz_id\n", |
||||
"''', db)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 4, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"zero_latlng_questions = {\n", |
||||
" q\n", |
||||
" for q, row in answers.groupby('question_text').agg('std').iterrows()\n", |
||||
" if row['user_lat'] == 0 and row['user_lng'] == 0\n", |
||||
"}\n", |
||||
"answers_filtered = answers[answers['question_text'].map(lambda x: x not in zero_latlng_questions)].copy()" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 10, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"text/plain": [ |
||||
"array(['gegaan', 'avond', 'heel', 'dag', 'bij (insect)', 'sprak (toe)',\n", |
||||
" 'oog', 'armen (lichaamsdeel)', 'kaas', 'deurtje', 'koken',\n", |
||||
" 'borst (lichaamsdeel)', 'vis', 'zaterdag', 'trein', 'geel', 'tand',\n", |
||||
" 'gezet', 'blad (aan een boom)'], dtype=object)" |
||||
] |
||||
}, |
||||
"execution_count": 10, |
||||
"metadata": {}, |
||||
"output_type": "execute_result" |
||||
} |
||||
], |
||||
"source": [ |
||||
"answers_filtered['question_text'].unique()" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 6, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"answers_filtered['question_text'] = answers_filtered['question_text'].map(\n", |
||||
" lambda x: x.replace('\"', '').replace('*', ''))\n", |
||||
"\n", |
||||
"answers_filtered['answer_text'] = answers_filtered['answer_text'].map(\n", |
||||
" lambda x: x[x.find('('):x.find(')')][1:])" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 8, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"centroids, pronunciations, counts = create_gabmap_dataframes(\n", |
||||
" regions, answers_filtered,\n", |
||||
" latitude_column='user_lat', longitude_column='user_lng',\n", |
||||
" word_column='question_text', pronunciation_column='answer_text',\n", |
||||
" region_name_property='gemeente_en_wijk_naam'\n", |
||||
")" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 14, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"pronunciations.to_csv('../data/Friesland_wijken_pronunciations.gabmap.tsv', sep='\\t')\n", |
||||
"counts.to_csv('../data/Friesland_wijken_pronunciation_percentages.gabmap.tsv', sep='\\t')\n", |
||||
"centroids.to_csv('../data/Friesland_wijken_centroids.gabmap.tsv', sep='\\t', columns=['longitude', 'latitude'])" |
||||
] |
||||
} |
||||
], |
||||
"metadata": { |
||||
"kernelspec": { |
||||
"display_name": "Python 3", |
||||
"language": "python", |
||||
"name": "python3" |
||||
}, |
||||
"language_info": { |
||||
"codemirror_mode": { |
||||
"name": "ipython", |
||||
"version": 3 |
||||
}, |
||||
"file_extension": ".py", |
||||
"mimetype": "text/x-python", |
||||
"name": "python", |
||||
"nbconvert_exporter": "python", |
||||
"pygments_lexer": "ipython3", |
||||
"version": "3.6.5" |
||||
} |
||||
}, |
||||
"nbformat": 4, |
||||
"nbformat_minor": 2 |
||||
} |
@ -1,265 +0,0 @@
@@ -1,265 +0,0 @@
|
||||
{ |
||||
"cells": [ |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"# Group recordings in 4 Frysian dialect regions\n", |
||||
"\n", |
||||
" * Klaaifrysk\n", |
||||
" * Waldfrysk\n", |
||||
" * Sudwesthoeksk\n", |
||||
" * Noardhoeksk\n", |
||||
" \n", |
||||
"First run `Dialect Regions from image.ipynb`.\n", |
||||
"\n", |
||||
"" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 2, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"from math import floor\n", |
||||
"import json\n", |
||||
"import pandas\n", |
||||
"import MySQLdb\n", |
||||
"from collections import Counter\n", |
||||
"\n", |
||||
"from math import sqrt\n", |
||||
"import numpy as np\n", |
||||
"from shapely.geometry import shape, Point\n", |
||||
"from vincenty import vincenty\n", |
||||
"\n", |
||||
"from jupyter_progressbar import ProgressBar\n", |
||||
"\n", |
||||
"db = MySQLdb.connect(user='root', passwd='Nmmxhjgt1@', db='stimmen', charset='utf8')" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"# Input\n", |
||||
"\n", |
||||
"Load the geojson with the dialect region and create shapely shapes." |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 3, |
||||
"metadata": { |
||||
"scrolled": true |
||||
}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"with open('../data/fryslan_dialect_regions.geojson', 'r') as f:\n", |
||||
" geojson = json.load(f)\n", |
||||
"\n", |
||||
"dialect_regions = [region['properties']['dialect'] for region in geojson['features']]" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 4, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"shapes = {\n", |
||||
" feature['properties']['dialect']: shape(feature['geometry'])\n", |
||||
" for feature in geojson['features']\n", |
||||
"}\n", |
||||
"\n", |
||||
"def regions_for(coordinate):\n", |
||||
" regions = {\n", |
||||
" region_name\n", |
||||
" for region_name, shape in shapes.items()\n", |
||||
" if shape.contains(Point(*coordinate))\n", |
||||
" }\n", |
||||
" return regions\n", |
||||
"\n", |
||||
"def distance_to_shape(shape, longitude, latitude):\n", |
||||
" ext = shape.exterior\n", |
||||
" p = ext.interpolate(ext.project(Point(longitude, latitude)))\n", |
||||
" return vincenty((latitude, longitude), (p.y, p.x))" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "markdown", |
||||
"metadata": {}, |
||||
"source": [ |
||||
"# Query and process\n", |
||||
"\n", |
||||
"Query all picture game and free speech recordings and assign the dialect region." |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 5, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"def dialect_regions_and_distance(data):\n", |
||||
" return[\n", |
||||
" {\n", |
||||
" 'dialects': [\n", |
||||
" {\n", |
||||
" 'dialect': dialect,\n", |
||||
" 'boundary_distance': distance_to_shape(shapes[dialect], longitude, latitude),\n", |
||||
" }\n", |
||||
" for dialect in regions_for((longitude, latitude))\n", |
||||
" ],\n", |
||||
" 'filename': filename,\n", |
||||
" }\n", |
||||
" for filename, (latitude, longitude) in ProgressBar(\n", |
||||
" data[['latitude', 'longitude']].iterrows(),\n", |
||||
" size=len(data)\n", |
||||
" )\n", |
||||
" ]" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 6, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"picture_games = pandas.read_sql('''\n", |
||||
"SELECT language.name as language, item.name as picture,\n", |
||||
" survey.user_lat as latitude, survey.user_lng as longitude,\n", |
||||
" survey.area_name as area, survey.country_name as country,\n", |
||||
" result.recording as filename,\n", |
||||
" result.submitted_at as date\n", |
||||
"FROM core_surveyresult as survey\n", |
||||
"INNER JOIN core_picturegameresult as result ON survey.id = result.survey_result_id\n", |
||||
"INNER JOIN core_language as language ON language.id = result.language_id\n", |
||||
"INNER JOIN core_picturegameitem as item\n", |
||||
" ON result.picture_game_item_id = item.id\n", |
||||
"''', db)\n", |
||||
"picture_games.set_index('filename', inplace=True)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 7, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"application/vnd.jupyter.widget-view+json": { |
||||
"model_id": "5825449a737b4fcab38a4f4ac2adfd87", |
||||
"version_major": 2, |
||||
"version_minor": 0 |
||||
}, |
||||
"text/plain": [ |
||||
"VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0โฆ" |
||||
] |
||||
}, |
||||
"metadata": {}, |
||||
"output_type": "display_data" |
||||
} |
||||
], |
||||
"source": [ |
||||
"dialect_region_per_picture_game = dialect_regions_and_distance(picture_games)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 8, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"df = pandas.DataFrame([\n", |
||||
" [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n", |
||||
" for r in dialect_region_per_picture_game\n", |
||||
" if len(r['dialects']) == 1\n", |
||||
"], columns = ['filename', 'dialect', 'boundary_distance'])\n", |
||||
"\n", |
||||
"df.to_excel('../data/picture_game_recordings_by_dialect.xlsx')\n", |
||||
"df.to_csv('../data/picture_game_recordings_by_dialect.csv')" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 9, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"free_speech_games = pandas.read_sql('''\n", |
||||
"SELECT language.name as language,\n", |
||||
" survey.user_lat as latitude, survey.user_lng as longitude,\n", |
||||
" survey.area_name as area, survey.country_name as country,\n", |
||||
" result.recording as filename,\n", |
||||
" result.submitted_at as date\n", |
||||
"FROM core_surveyresult as survey\n", |
||||
"INNER JOIN core_freespeechresult as result ON survey.id = result.survey_result_id\n", |
||||
"INNER JOIN core_language as language ON language.id = result.language_id\n", |
||||
"''', db)\n", |
||||
"free_speech_games.set_index('filename', inplace=True)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 10, |
||||
"metadata": {}, |
||||
"outputs": [ |
||||
{ |
||||
"data": { |
||||
"application/vnd.jupyter.widget-view+json": { |
||||
"model_id": "8afad9f71e544658b554b828932d7769", |
||||
"version_major": 2, |
||||
"version_minor": 0 |
||||
}, |
||||
"text/plain": [ |
||||
"VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='<b>0</b>s passed', placeholder='0โฆ" |
||||
] |
||||
}, |
||||
"metadata": {}, |
||||
"output_type": "display_data" |
||||
} |
||||
], |
||||
"source": [ |
||||
"dialect_region_per_free_speech = dialect_regions_and_distance(free_speech_games)" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 11, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"df = pandas.DataFrame([\n", |
||||
" [r['filename'], r['dialects'][0]['dialect'], r['dialects'][0]['boundary_distance']]\n", |
||||
" for r in dialect_region_per_free_speech\n", |
||||
" if len(r['dialects']) == 1\n", |
||||
"], columns = ['filename', 'dialect', 'boundary_distance'])\n", |
||||
"\n", |
||||
"df.to_excel('../data/free_speech_recordings_by_dialect.xlsx')\n", |
||||
"df.to_csv('../data/free_speech_recordings_by_dialect.csv')" |
||||
] |
||||
} |
||||
], |
||||
"metadata": { |
||||
"kernelspec": { |
||||
"display_name": "Python 3", |
||||
"language": "python", |
||||
"name": "python3" |
||||
}, |
||||
"language_info": { |
||||
"codemirror_mode": { |
||||
"name": "ipython", |
||||
"version": 3 |
||||
}, |
||||
"file_extension": ".py", |
||||
"mimetype": "text/x-python", |
||||
"name": "python", |
||||
"nbconvert_exporter": "python", |
||||
"pygments_lexer": "ipython3", |
||||
"version": "3.6.5" |
||||
} |
||||
}, |
||||
"nbformat": 4, |
||||
"nbformat_minor": 1 |
||||
} |
File diff suppressed because one or more lines are too long