Uživatel:Stardust85/statistika
Skočit na navigaci
Skočit na vyhledávání
Kód wikitabulky vygenerován skriptem - počítá pouze existující kategorie. Vznikne-li nová kategorie, musí se sem ručně doplnit (nebo spustit znovu skript).
tabulka[editovat]
jazyk / druh | subst. | adjekt. | zájm. | čísl. | slovesa | příslovce | předložky | spojky | částice | citosl. | celkem |
---|---|---|---|---|---|---|---|---|---|---|---|
Afrikánština | 29 | 4 | - | - | - | - | - | - | - | - | 33 |
Angličtina | 7 949 | 2 170 | 49 | 38 | 2 104 | 257 | 48 | 35 | 5 | 54 | 12709 |
Arabština | 132 | - | - | - | - | - | - | - | - | - | 132 |
Asturština | 4 | - | - | - | - | - | - | - | - | - | 4 |
Běloruština | 26 | - | - | - | - | - | - | 1 | - | - | 27 |
Bosenština | 9 | - | - | - | - | - | - | - | - | - | 9 |
Bretonština | 18 | - | - | - | - | - | - | - | - | - | 18 |
Bulharština | 36 | - | - | - | - | - | - | 2 | - | - | 38 |
Čečenština | 10 | - | - | - | - | - | - | - | - | - | 10 |
Čeština | 23 656 | 9 099 | 146 | 199 | 3 225 | 1 257 | 94 | 130 | 118 | 344 | 38268 |
Dánština | 139 | 46 | 3 | 23 | 6 | - | - | - | - | - | 217 |
Esperanto | 634 | - | 9 | - | - | 12 | - | - | - | - | 655 |
Estonština | 125 | 8 | - | - | - | - | - | - | - | - | 133 |
Fidžijština | - | - | - | - | 1 | - | - | - | - | - | 1 |
Finština | 1 797 | 313 | - | - | - | 116 | - | - | - | - | 2226 |
Francouzština | 6 325 | 3 208 | 56 | 34 | 2 169 | 196 | - | 19 | - | 45 | 12052 |
Fríština | - | 1 | - | - | - | - | - | - | - | - | 1 |
Galicijština | 47 | - | - | - | - | - | - | - | - | - | 47 |
Hebrejština | 610 | - | - | - | - | - | - | - | - | - | 610 |
Chorvatština | 53 | - | - | - | - | - | - | - | - | - | 53 |
Interlingua | - | - | - | - | - | - | - | - | - | - | ' |
Irština | 306 | - | - | - | - | - | - | - | - | - | 306 |
Islandština | 146 | - | - | 2 | - | - | - | - | - | - | 148 |
Italština | 1 828 | 251 | 13 | - | 163 | 37 | - | 11 | - | - | 2303 |
Japonština | 2 639 | - | - | - | - | - | - | - | - | - | 2639 |
Kašubština | 133 | - | - | - | - | - | - | - | - | - | 133 |
Katalánština | 143 | 9 | - | - | - | - | - | - | - | - | 152 |
Kečuánština | 5 | - | - | - | - | - | - | - | - | - | 5 |
Krymská tatarština | 9 | - | - | - | - | - | - | - | - | - | 9 |
Latina | 1 417 | 456 | 27 | 17 | 399 | - | - | 26 | - | - | 2342 |
Litevština | 222 | - | - | - | - | - | - | - | 5 | - | 227 |
Lotyština | 128 | - | - | - | - | - | - | - | - | - | 128 |
Maďarština | 539 | 89 | - | - | 82 | - | - | - | - | - | 710 |
Makedonština | - | - | - | - | - | - | - | 1 | - | - | 1 |
Němčina | 8 637 | 1 928 | 44 | 90 | 820 | 262 | 40 | 36 | 9 | - | 11866 |
Nizozemština | 605 | 103 | 15 | - | 100 | - | - | - | - | - | 823 |
Norština | 8 | 2 | 2 | 2 | - | - | - | - | - | - | 14 |
Okcitánština | 85 | - | - | - | - | - | - | - | - | - | 85 |
Oshiwambo | 0 | - | 0 | - | 0 | - | - | - | - | - | 0 |
Polština | 4 040 | 211 | 23 | - | - | 36 | - | 14 | 6 | - | 4330 |
Portugalština | 369 | - | - | - | 55 | - | - | 6 | - | - | 430 |
Romština | 182 | - | 11 | - | - | - | - | - | - | - | 193 |
Rumunština | 84 | - | - | - | - | - | 2 | - | - | - | 86 |
Ruština | 1 600 | 1 354 | 20 | - | 283 | - | - | 7 | - | - | 3264 |
Řečtina | 982 | - | - | - | - | - | - | - | - | - | 982 |
Sanskrt | 29 | - | - | - | - | - | - | - | - | - | 29 |
Skotská gaelština | 15 | - | - | - | - | - | - | - | - | - | 15 |
Slovenština | 1 645 | 787 | 31 | 12 | 163 | 78 | 19 | 17 | - | - | 2752 |
Slovinština | 123 | 8 | - | - | - | 9 | - | - | - | - | 140 |
Slovio | 0 | - | - | - | - | - | - | 0 | - | - | 0 |
Srbština | 1 598 | - | - | - | - | - | - | 3 | - | - | 1601 |
Stará angličtina | 29 | - | - | - | - | - | - | 3 | - | - | 32 |
Staroslověnština | - | - | - | - | - | - | - | 2 | - | - | 2 |
Svahilština | 90 | - | - | - | - | - | - | - | - | - | 90 |
Španělština | 1 631 | 159 | 42 | 53 | 200 | 29 | 9 | 9 | 3 | - | 2135 |
Švédština | 699 | 134 | 49 | 69 | 249 | 93 | 34 | 24 | 8 | 14 | 1373 |
Tádžičtina | - | - | - | - | - | - | 1 | - | - | - | 1 |
Tofalarština | - | - | 1 | - | - | - | - | - | - | - | 1 |
Turečtina | 430 | - | - | - | - | - | - | - | - | - | 430 |
Ukrajinština | 254 | - | - | - | - | - | - | 2 | - | - | 256 |
Velština | 20 | - | 4 | - | - | - | - | - | - | - | 24 |
kód skriptu[editovat]
#!/bin/bash
# Generates table with detailed statistics about languages for cs.wiktionary.org
# see the result at cs.wiktionary.org/wiki/Uživatel:Stardust85/statistika
# author: Michel Samia (m.samia at seznam.cz)
# usage: $ ./newstats.sh > wikitable.txt
# WARNING: this script LOADS wikimedia servers by one request per language category
# (now mid 2009 about 60 GETs), so don't run it too often
LANG= # because behaviour of sorting, greping and other things are locale-dependent
DOMAIN="http://cs.wiktionary.org/wiki"
TYPES="substantiva\nadjektiva\nzájmena\nčíslovky\nslovesa\npříslovce\npředložky\nspojky\nčástice\ncitoslovce"
mkdir langs
i=0 # counter
echo -e "\nSTEP 1: Downloading categories" >&2
# Get list of language categories
wget -q -O- http://cs.wiktionary.org/wiki/Kategorie:Jazykov%C3%A9_kategorie |grep '<span class="CategoryTreeBullet">' |
{
echo "Next language..." >&2
while read line
do
# page containing categories by types of words of given language (nouns, verbs...)
langHref="$DOMAIN` echo $line | sed -e 's/^.*wiki//' | sed -e 's/".*$//'`"
# name of the language
langName="`echo $line | cut -d'>' -f 12 | cut -d'<' -f1`"
echo -n > "langs/${langName}"
echo $i $langName >&2
i=$[ i + 1 ]
# uz mame url kategorie s jazykem, ted z nej musime ziskat vhodne podkategorie
# ty ulozime po radcich do lang/$langname, napr lang/Afrikánština
wget -q -O- $langHref | grep '<li><div class="CategoryTreeSection"><div class="CategoryTreeItem">' |
{
sum=0
while read line2
do
typeHref="` echo $line2 | sed -e 's/<[^>]*>//g' | sed -e 's/\[.*\]//' | sed -e 's/(.*$//'`"
echo "$typeHref" >> "langs/$langName"
done
}
echo -n "sleeping..." >&2
sleep 1
echo "OK" >&2
done
}
######## generating the table ######
#now we have all the data and we can generate the table
echo -e "\nSTEP 2: Creating table" >&2
cd langs/
i=0
for file in *
do
echo "$i: $file" >&2
i=$[ i+1 ]
echo " | $file"
echo -e "${TYPES}" |
{
while read druh
do
completeDruh="`grep "$druh" "$file" | sed 's/^ //'`"
if [[ "$completeDruh" == "" ]]
then
echo ' | -'
else
echo " | [[:Kategorie:$completeDruh| {{PAGESINCATEGORY:$completeDruh}}]]"
fi
done
}
echo " | '''{{#expr:"
first="true"
echo -e "${TYPES}" |
{
while read druh
do
completeDruh="`grep "$druh" "$file" | sed 's/^ //'`"
if [[ $completeDruh != "" ]]
then
if [[ $first == "true" ]]
then
echo "{{PAGESINCATEGORY:$completeDruh|R}}"
first="false"
else
echo "+{{PAGESINCATEGORY:$completeDruh|R}}"
fi
fi
done
echo "}}'''"
echo " |-"
}
done