The Languages of the World
Distribution of language status for top 10 countries with most languages.
By Manish Datt
TidyTuesday dataset of 2025-12-23
Languages
Endangered Status
Families
Combined Data
Plotting code
<link href="https://unpkg.com/tabulator-tables@6.3.1/dist/css/tabulator.min.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/d3@7"></script>
<script src="https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6.11/dist/plot.umd.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/htl@0.3.1/dist/htl.min.js"></script>
<script type="text/javascript" src="https://unpkg.com/tabulator-tables@6.3.1/dist/js/tabulator.min.js"></script>
<script src="https://unpkg.com/papaparse@5.4.1/papaparse.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/danfojs@1.1.2/lib/bundle.js"></script>
<script type="module">
// Import the ES module version of i18n-iso-countries
import countries from 'https://cdn.skypack.dev/i18n-iso-countries';
async function init() {
// Fetch the English language data
const response = await fetch('https://cdn.jsdelivr.net/npm/i18n-iso-countries@7.11.0/langs/en.json');
const data = await response.json();
// Register the English locale
countries.registerLocale(data);
// Make the getCountryName function available globally
window.getCountryName = function(code) {
return countries.getName(code.toUpperCase(), 'en') || code;
};
}
init();
</script>
<div class="mx-auto max-w-6xl">
<h3 class="mb-4 text-xl font-bold text-slate-800">Languages</h3>
<div id="languages-table" class="mb-8 overflow-x-auto"></div>
<div class="mb-8 grid grid-cols-1 gap-8 md:grid-cols-2">
<div class="flex flex-col">
<h3 class="mb-4 text-xl font-bold text-slate-800">Endangered Status</h3>
<div id="endangered-table" class="overflow-x-auto"></div>
</div>
<div class="flex flex-col">
<h3 class="mb-4 text-xl font-bold text-slate-800">Families</h3>
<div id="families-table" class="overflow-x-auto"></div>
</div>
</div>
<h3 class="mb-4 text-xl font-bold text-slate-800">Combined Data</h3>
<div id="combined-table" class="mb-12 overflow-x-auto"></div>
<div class="rounded-lg border border-slate-100 bg-slate-50/50 p-6">
<div id="stacked-barplot" class="w-full"></div>
<div id="status-filters" class="mt-4 flex flex-wrap gap-2"></div>
</div>
</div>
<script>
window.addEventListener('load', () => {
Promise.all([
fetch('endangered_status.csv').then(r => r.text()),
fetch('families.csv').then(r => r.text()),
fetch('languages.csv').then(r => r.text())
])
.then(([endText, famText, langText]) => {
const endData = Papa.parse(endText, {header: true}).data;
const famData = Papa.parse(famText, {header: true}).data;
const langData = Papa.parse(langText, {header: true}).data;
// Create maps for merging
const endangeredMap = new Map(endData.map(row => [row.id, row]));
const familiesMap = new Map(famData.map(row => [row.id, row]));
// Merge data using the original approach
const combinedData = langData.map(lang => {
const end = endangeredMap.get(lang.id);
const fam = familiesMap.get(lang.family_id);
return {
id: lang.id,
name: lang.name,
latitude: lang.latitude,
longitude: lang.longitude,
iso639P3code: lang.iso639P3code,
countries: lang.countries,
glottocode: lang.glottocode,
macroarea: lang.macroarea,
family_id: lang.family_id,
family: fam ? fam.family : '',
status_code: end ? end.status_code : '',
status_label: end ? end.status_label : ''
};
});
// Create tables
new Tabulator("#endangered-table", {
data: endData,
columns: [
{title: "ID", field: "id"},
{title: "Status Code", field: "status_code"},
{title: "Status Label", field: "status_label"}
],
layout: "fitColumns",
pagination: "local",
paginationSize: 5
});
new Tabulator("#families-table", {
data: famData,
columns: [
{title: "ID", field: "id"},
{title: "Family", field: "family"}
],
layout: "fitColumns",
pagination: "local",
paginationSize: 5
});
new Tabulator("#languages-table", {
data: langData,
columns: [
{title: "ID", field: "id"},
{title: "Name", field: "name"},
{title: "Latitude", field: "latitude"},
{title: "Longitude", field: "longitude"},
{title: "ISO 639-3 Code", field: "iso639P3code"},
{title: "Countries", field: "countries"},
{title: "Glottocode", field: "glottocode"},
{title: "Macroarea", field: "macroarea"},
{title: "Family ID", field: "family_id"}
],
layout: "fitColumns",
pagination: "local",
paginationSize: 5
});
new Tabulator("#combined-table", {
data: combinedData,
columns: [
{title: "ID", field: "id"},
{title: "Name", field: "name"},
{title: "Latitude", field: "latitude"},
{title: "Longitude", field: "longitude"},
{title: "ISO 639-3 Code", field: "iso639P3code"},
{title: "Countries", field: "countries"},
{title: "Glottocode", field: "glottocode"},
{title: "Macroarea", field: "macroarea"},
{title: "Family ID", field: "family_id"},
{title: "Family", field: "family"},
{title: "Status Code", field: "status_code"},
{title: "Status Label", field: "status_label"}
],
layout: "fitColumns",
pagination: "local",
paginationSize: 5
});
// Process data for status grouping using danfojs
const df = new dfd.DataFrame(combinedData);
// Process countries column to handle multiple countries per language
const explodedData = [];
combinedData.forEach(row => {
const countries = row.countries ? row.countries.split(';').map(c => c.trim()).filter(c => c) : [];
countries.forEach(country => {
explodedData.push({
country: country,
status_code: row.status_code,
status_label: row.status_label,
language: row.name
});
});
});
// Create new dataframe with exploded countries
const explodedDf = new dfd.DataFrame(explodedData);
// Group by country and status code, then count languages
const grouped = explodedDf.groupby(['country', 'status_code']);
const result = grouped.count().resetIndex();
// The count column might be named 'language' or something else, let's check
// and rename it to 'count' if needed
if (result.columns.includes('language')) {
result.rename({language: 'count'}, inplace=true);
} else if (result.columns.includes('status_label')) {
result.rename({status_label: 'count'}, inplace=true);
}
// Sort by count descending
const countColumn = result.columns.find(col => col !== 'country' && col !== 'status_code');
if (countColumn) {
result.sortValues(countColumn, {ascending: false}, inplace=true);
}
// Convert the result to the format Tabulator expects
const tableData = result.values.map((row, index) => {
return {
id: index,
country: getCountryName(row[0]),
status_code: row[1],
status_label_count: row[2],
language_count: row[3]
};
});
// Create column definitions using column names
const columnDefinitions = [
{title: "Country", field: "country"},
{title: "Status Code", field: "status_code"},
{title: "Status Label Count", field: "status_label_count"},
{title: "Language Count", field: "language_count"}
];
// Process data for stacked bar plot
// Sort grouped data by status code first
const sortedResult = [...result.values].sort((a, b) => a[1].localeCompare(b[1]));
// Get top 10 countries by total language count
const countryTotals = new Map();
result.values.forEach(row => {
const country = row[0];
const langCount = row[3]; // language_count is at index 3
countryTotals.set(country, (countryTotals.get(country) || 0) + langCount);
});
// Get top 10 countries
const top10Countries = Array.from(countryTotals.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([country]) => country);
// Filter data for top 10 countries and transform for stacked bar plot
const stackedData = [];
sortedResult.forEach(row => {
const country = row[0];
const status = row[1];
const count = row[3];
if (top10Countries.includes(country)) {
// Replace blank status code with "0" and filter only status codes 0-6
const processedStatus = status === '' || status === null || status === undefined ? '0' : status;
// Only include status codes 0-6
if (processedStatus >= '0' && processedStatus <= '6') {
stackedData.push({
country: getCountryName(country),
status: processedStatus,
count: count
});
}
}
});
// Prepare data for horizontal stacked bar plot
// Sort countries by total count (descending) for y-axis
const countryData = {};
stackedData.forEach(item => {
if (!countryData[item.country]) {
countryData[item.country] = {
country: item.country,
statusCounts: {}
};
}
countryData[item.country].statusCounts[item.status] = item.count;
});
// Convert to array and sort by total count
const sortedCountries = Object.values(countryData).sort((a, b) => {
const totalA = Object.values(a.statusCounts).reduce((sum, count) => sum + count, 0);
const totalB = Object.values(b.statusCounts).reduce((sum, count) => sum + count, 0);
return totalB - totalA;
});
// Flatten data for plotting and calculate totals
const stackedPlotData = [];
const barCountryTotals = {};
sortedCountries.forEach(countryData => {
const total = Object.values(countryData.statusCounts).reduce((sum, count) => sum + count, 0);
barCountryTotals[countryData.country] = total;
Object.entries(countryData.statusCounts).forEach(([status, count]) => {
stackedPlotData.push({
country: countryData.country,
status: status,
count: count
});
});
});
// Define unique colors for each status code (0-6)
const statusColors = {
"0": "#BDBDBD",
"1": "#2E7D32",
"2": "#9CCC65",
"3": "#FBC02D",
"4": "#EF6C00",
"5": "#C62828",
"6": "#6D5C6D"
};
// Create mapping from status code to status label
const statusLabels = {};
endData.forEach(row => {
if (row.status_code && row.status_code >= 0 && row.status_code <= 6) {
statusLabels[row.status_code] = row.status_label || "NA";
}
});
// Ensure status 0 has "NA" label
statusLabels["0"] = "NA";
// Get unique status codes and create checkboxes
const uniqueStatuses = [...new Set(stackedPlotData.map(d => d.status))];
const statusFilters = {};
uniqueStatuses.forEach(status => {
statusFilters[status] = true;
});
// Create checkboxes
const filterContainer = document.getElementById('status-filters');
// Create "All" checkbox
const allLabel = document.createElement('label');
allLabel.style.marginRight = '15px';
allLabel.style.display = 'inline-block';
allLabel.style.fontWeight = 'bold';
const allCheckbox = document.createElement('input');
allCheckbox.type = 'checkbox';
allCheckbox.checked = true;
allCheckbox.id = 'all-status';
allCheckbox.addEventListener('change', function() {
const isChecked = this.checked;
uniqueStatuses.forEach(status => {
statusFilters[status] = isChecked;
const statusCheckbox = document.querySelector(`input[value="${status}"]`);
if (statusCheckbox) {
statusCheckbox.checked = isChecked;
}
});
updateStackedBarPlot();
});
allLabel.appendChild(allCheckbox);
allLabel.appendChild(document.createTextNode('All'));
filterContainer.appendChild(allLabel);
// Create individual status checkboxes
uniqueStatuses.forEach(status => {
const label = document.createElement('label');
label.style.marginRight = '15px';
label.style.display = 'inline-block';
const checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.checked = true;
checkbox.value = status;
checkbox.addEventListener('change', function() {
statusFilters[status] = this.checked;
// Update "All" checkbox state based on individual checkboxes
const allChecked = uniqueStatuses.every(s => statusFilters[s]);
allCheckbox.checked = allChecked;
updateStackedBarPlot();
});
label.appendChild(checkbox);
const statusText = document.createTextNode(statusLabels[status] || `Status ${status}`);
label.appendChild(statusText);
// Apply color to the label text
label.style.color = statusColors[status] || "#000000";
filterContainer.appendChild(label);
});
// Function to update stacked bar plot based on filters
function updateStackedBarPlot() {
const filteredData = stackedPlotData.filter(d => statusFilters[d.status]);
// Recalculate totals
const filteredTotals = {};
filteredData.forEach(d => {
if (!filteredTotals[d.country]) {
filteredTotals[d.country] = 0;
}
filteredTotals[d.country] += d.count;
});
// Update plot
const updatedPlot = Plot.plot({
title: htl.html`<span class="fs-3">Status distribution for top 10 countries with most languages</span>`,
marks: [
Plot.barX(filteredData, {x: "count", y: "country", fill: "status"}),
Plot.text(filteredData.map(d => ({
country: d.country,
count: filteredTotals[d.country] || 0
})), {x: "count", y: "country", text: "count", textAnchor: "start", dx: 8, fill: "black", fontWeight: "bold", fontSize: 12})
],
x: {label: null, tickFormat: null, ticks: null, tickSize: 0, axis: null},
y: {label: null, domain: sortedCountries.map(d => d.country), ticks: null, tickSize: 0},
color: {
type: "ordinal",
domain: Object.keys(statusColors),
range: Object.values(statusColors)
},
width: 700,
height: 400,
marginLeft: 170, // Increase left margin for longer country names
marginRight: 30, // Reduce right margin to shift plot right
style: {
fontSize: "14px"
}
});
// Replace old plot with new one
const plotContainer = document.getElementById('stacked-barplot');
plotContainer.innerHTML = '';
plotContainer.appendChild(updatedPlot);
}
// Create initial stacked bar plot
const stackedBarPlot = Plot.plot({
title: htl.html`<span class="fs-3">Status distribution for top 10 countries with most languages</span>`,
marks: [
Plot.barX(stackedPlotData, {x: "count", y: "country", fill: "status"}),
Plot.text(stackedPlotData.map(d => ({
country: d.country,
count: barCountryTotals[d.country]
})), {x: "count", y: "country", text: "count", textAnchor: "start", dx: 8, fill: "black", fontWeight: "bold", fontSize: 12})
],
x: {label: null, tickFormat: null, ticks: null, tickSize: 0, axis: null},
y: {label: null, domain: sortedCountries.map(d => d.country), ticks: null, tickSize: 0},
color: {
type: "ordinal",
domain: Object.keys(statusColors),
range: Object.values(statusColors)
},
width: 700,
height: 400,
marginLeft: 170, // Increase left margin for longer country names
marginRight: 30, // Reduce right margin to shift plot right
style: {
fontSize: "14px"
}
});
document.getElementById('stacked-barplot').appendChild(stackedBarPlot);
})
.catch(error => console.error('Error loading CSV files:', error));
});
</script>