Skip to content

Commit bca5ddd

Browse files
committed
ICU instead of unholy combination of langtag and isolang
1 parent a9e8c05 commit bca5ddd

File tree

12 files changed

+625
-432
lines changed

12 files changed

+625
-432
lines changed

build/Cargo.lock

Lines changed: 439 additions & 322 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

build/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ celes = "2.5.0"
99
clap = { version = "4.5.23", features = ["derive"] }
1010
dunce = "1.0.5"
1111
eyre = "0.6.12"
12-
indexmap = "2.7.0"
12+
icu = { version = "1.5.0", features = ["experimental", "serde"] }
13+
icu_provider = { version = "1.5.0", features = ["sync"] }
14+
indexmap = "2.7.1"
1315
isbn = { version = "0.4.0" }
14-
isolang = { version = "2.4.0", features = ["serde", "local_names"] }
1516
itertools = "0.14.0"
16-
langtag = { version = "0.4.0", features = ["serde"] }
1717
markdown = "1.0.0-alpha"
18-
maud = "0.26.0"
18+
maud = "0.27.0"
1919
notify = "8.0.0"
2020
notify-debouncer-full = "0.5.0"
2121
num-format = "0.4.4"

build/src/bib_render.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::{
1010
LString, MagazineArticle, NewspaperArticle, NumberOrString, Pagination, Periodical, Person,
1111
Reference, Thesis, WebPage,
1212
},
13+
intl::INTL,
1314
nvec::OneOrMore,
1415
};
1516

@@ -288,11 +289,11 @@ fn render_title(r: &Reference) -> Markup {
288289
};
289290

290291
let additional_suffix = r.common().language.as_ref().map(|lang| {
291-
html! {
292-
"text in " (lang.to_name())
293-
meta itemprop="inLanguage" content=(lang.to_639_1().unwrap_or_else(|| lang.to_639_3())) ;
294-
}
295-
});
292+
html! {
293+
"text in " (INTL.english_name(lang.language).unwrap())
294+
meta itemprop="inLanguage" content=(lang);
295+
}
296+
});
296297

297298
let rest = render_lstr_alt(
298299
&r.common().title,
@@ -422,7 +423,7 @@ fn render_lstr_just_span(
422423
item_prop: Option<&'static str>,
423424
) -> Markup {
424425
html! {
425-
span class=[class] itemprop=[item_prop] lang=[lstr.lang.as_deref()] {
426+
span class=[class] itemprop=[item_prop] lang=[lstr.lang.as_ref()] {
426427
(maud::PreEscaped(&lstr.value))
427428
}
428429
}
@@ -469,7 +470,7 @@ fn render_lstr_just_cite(
469470
item_prop: Option<&'static str>,
470471
) -> Markup {
471472
html! {
472-
cite class=[class] itemprop=[item_prop] lang=[lstr.lang.as_deref()] {
473+
cite class=[class] itemprop=[item_prop] lang=[lstr.lang.as_ref()] {
473474
(maud::PreEscaped(&lstr.value))
474475
}
475476
}

build/src/bibliography.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::{collections::BTreeMap, convert::Infallible, str::FromStr};
22

3-
use isolang::Language;
4-
use langtag::LangTagBuf;
3+
use icu::locid::LanguageIdentifier;
54
use num_format::ToFormattedString;
65
use serde::{
76
de::{self, Visitor},
@@ -137,7 +136,7 @@ pub struct Common {
137136
#[serde(rename = "archive-URL")]
138137
pub archive_url: Option<String>,
139138

140-
pub language: Option<Language>,
139+
pub language: Option<LanguageIdentifier>,
141140

142141
pub notes: Option<String>,
143142
pub warnings: Option<String>,
@@ -508,7 +507,7 @@ impl Date {
508507
#[derive(Debug, Clone)]
509508
pub struct LString {
510509
pub value: String,
511-
pub lang: Option<LangTagBuf>,
510+
pub lang: Option<LanguageIdentifier>,
512511
pub alt: Option<String>,
513512
}
514513

@@ -557,7 +556,7 @@ impl<'de> Deserialize<'de> for LString {
557556
where
558557
A: de::MapAccess<'de>,
559558
{
560-
let mut value = None;
559+
let mut value: Option<String> = None;
561560
let mut lang = None;
562561
let mut alt = None;
563562

@@ -575,7 +574,7 @@ impl<'de> Deserialize<'de> for LString {
575574
return Err(de::Error::duplicate_field("lang"));
576575
}
577576

578-
lang = match LangTagBuf::new(v) {
577+
lang = match LanguageIdentifier::try_from_bytes(v.as_bytes()) {
579578
Ok(l) => Some(l),
580579
Err(e) => return Err(de::Error::custom(e)),
581580
};

build/src/intl.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
use std::{
2+
collections::BTreeMap,
3+
sync::{LazyLock, Mutex},
4+
};
5+
6+
use icu::{
7+
experimental::displaynames::LanguageDisplayNames,
8+
locid::{
9+
locale,
10+
subtags::{language, Language},
11+
Locale,
12+
},
13+
};
14+
15+
pub struct Intl {
16+
english_display_names: LanguageDisplayNames,
17+
other_display_names: Mutex<BTreeMap<Language, Option<String>>>,
18+
}
19+
20+
impl Intl {
21+
pub fn new() -> Self {
22+
let locale = locale!("en-GB").into();
23+
let options = Default::default();
24+
let english_display_names = LanguageDisplayNames::try_new(&locale, options).unwrap();
25+
26+
let mut other_display_names = BTreeMap::new();
27+
// backfill missing values
28+
other_display_names.insert(language!("cmn"), Some("官话".into()));
29+
other_display_names.insert(language!("ewe"), Some("Eʋegbe".into()));
30+
other_display_names.insert(language!("gez"), Some("ግዕዝ".into()));
31+
other_display_names.insert(language!("kxd"), Some("Bahasa Melayu Brunei".into()));
32+
other_display_names.insert(language!("mak"), Some("ᨅᨔ ᨆᨀᨔᨑ".into()));
33+
other_display_names.insert(language!("mcm"), Some("Papia Kristang".into()));
34+
other_display_names.insert(language!("nan"), Some("閩南語".into()));
35+
other_display_names.insert(language!("rng"), Some("XiRonga".into()));
36+
other_display_names.insert(language!("tws"), Some("潮州話".into()));
37+
other_display_names.insert(language!("wuu"), Some("吳語".into()));
38+
39+
Self {
40+
english_display_names,
41+
other_display_names: Mutex::new(other_display_names),
42+
}
43+
}
44+
45+
pub fn english_name(&self, language: Language) -> Option<&str> {
46+
self.english_display_names.of(language).or_else(|| {
47+
match language.as_str() {
48+
"cmn" => "Mandarin Chinese",
49+
"kew" => "Kewa",
50+
"kxd" => "Brunei Malay",
51+
"mbw" => "Maring",
52+
"mcm" => "Kristang",
53+
"mnk" => "Mandinka",
54+
"mnr" => "Mono",
55+
"rng" => "Ronga",
56+
"stv" => "Siltʼe",
57+
"tws" => "Teochew",
58+
"urh" => "Urhobo",
59+
_ => return None,
60+
}
61+
.into()
62+
})
63+
}
64+
65+
pub fn autonym(&self, language: Language) -> Option<String> {
66+
self.other_display_names
67+
.lock()
68+
.unwrap()
69+
.entry(language)
70+
.or_insert_with_key(|key| {
71+
let locale = Locale::from(*key).into();
72+
let options = Default::default();
73+
LanguageDisplayNames::try_new(&locale, options)
74+
.ok()
75+
.and_then(|display_names| display_names.of(*key).map(str::to_string))
76+
})
77+
.clone()
78+
}
79+
}
80+
81+
pub static INTL: LazyLock<Intl> = LazyLock::new(Intl::new);

build/src/main.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use std::{
1515
use bib_render::RenderedBibliography;
1616
use clap::Parser;
1717
use eyre::{bail, eyre, Context, ContextCompat, OptionExt, Result};
18+
use icu::locid::LanguageIdentifier;
1819
use itertools::Itertools;
19-
use langtag::LangTagBuf;
2020
use markdown::{mdast, Constructs, ParseOptions};
2121
use maud::Markup;
2222
use notify::{
@@ -36,6 +36,7 @@ use walkdir::WalkDir;
3636
mod bib_render;
3737
mod bib_to_csl;
3838
mod bibliography;
39+
mod intl;
3940
mod mdast_to_html;
4041
mod nvec;
4142
mod templates;
@@ -338,7 +339,7 @@ impl ImageManifestEntry {
338339
}
339340

340341
pub struct Aka {
341-
pub language: LangTagBuf,
342+
pub lang_id: LanguageIdentifier,
342343
pub word: Markup,
343344
pub url_path: String,
344345
}
@@ -671,7 +672,7 @@ impl Builder {
671672
article: &File<T>,
672673
url_lookup: &BTreeMap<String, Option<&str>>,
673674
article_tree: Option<&ArticleNode>,
674-
aka_handler: impl Fn(LangTagBuf, Markup),
675+
aka_handler: impl Fn(LanguageIdentifier, Markup),
675676
) -> Result<OutputFile>
676677
where
677678
File<T>: ArticleMetadata + BaseMetadata,
@@ -758,10 +759,10 @@ impl Builder {
758759
.par_iter()
759760
.filter(|a| !a.is_draft() || self.output_drafts)
760761
.map(|article| {
761-
let push_aka = |language: LangTagBuf, word: Markup| {
762+
let push_aka = |language: LanguageIdentifier, word: Markup| {
762763
if self.output_drafts || !article.is_draft() {
763764
let aka = Aka {
764-
language,
765+
lang_id: language,
765766
word,
766767
url_path: article.url_path.to_string(),
767768
};
@@ -777,10 +778,10 @@ impl Builder {
777778
.par_iter()
778779
.filter(|a| !a.is_draft() || self.output_drafts)
779780
.map(|game| {
780-
let push_aka = |language: LangTagBuf, word: Markup| {
781+
let push_aka = |language: LanguageIdentifier, word: Markup| {
781782
if self.output_drafts || !game.is_draft() {
782783
let aka = Aka {
783-
language,
784+
lang_id: language,
784785
word,
785786
url_path: game.url_path.to_string(),
786787
};

build/src/mdast_to_html.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use std::{borrow::Cow, collections::BTreeMap, path::Path, sync::LazyLock};
22

33
use eyre::{bail, eyre, Context, OptionExt, Result};
4+
use icu::locid::{langid, LanguageIdentifier};
45
use indexmap::IndexMap;
56
use itertools::Itertools;
6-
use langtag::LangTagBuf;
77
use markdown::mdast::{
88
AttributeContent, AttributeValue, Blockquote, MdxJsxFlowElement, MdxJsxTextElement, Node, Text,
99
Yaml,
@@ -57,7 +57,7 @@ pub fn to_html(
5757
bibliography: &RenderedBibliography,
5858
images: &ImageManifest,
5959
url_lookup: &BTreeMap<String, Option<&str>>,
60-
aka_handler: impl Fn(LangTagBuf, Markup),
60+
aka_handler: impl Fn(LanguageIdentifier, Markup),
6161
) -> Result<Markup> {
6262
let (fndefs, linkdefs) = locate_defs(node);
6363
Converter {
@@ -87,7 +87,7 @@ struct Converter<'a> {
8787
cite_count: usize,
8888
header_stack: Vec<usize>,
8989
url_lookup: &'a BTreeMap<String, Option<&'a str>>,
90-
aka_handler: &'a dyn Fn(LangTagBuf, Markup),
90+
aka_handler: &'a dyn Fn(LanguageIdentifier, Markup),
9191
}
9292

9393
fn index_to_string(mut index: u32) -> String {
@@ -706,10 +706,10 @@ impl Converter<'_> {
706706
.any(|(name, value)| *name == "class" && value.contains("aka"))
707707
{
708708
let lang_attr = find_attribute(&text.attributes, "lang")
709-
.map(|n| LangTagBuf::new(n.to_string()))
709+
.map(|n| LanguageIdentifier::try_from_bytes(n.as_bytes()))
710710
.transpose()
711-
.wrap_err("invalid lang tag")?
712-
.unwrap_or_else(|| LangTagBuf::new("en".to_string()).unwrap());
711+
.map_err(|e| eyre!("invalid lang tag {e}"))?
712+
.unwrap_or_else(|| langid!("en"));
713713

714714
let markup = self.expand(&text.children)?;
715715
(self.aka_handler)(lang_attr, markup);

0 commit comments

Comments
 (0)