1use crate::unicode_gen::*;
7use std::cmp::Ordering;
8use types::MappingOffset;
9
10pub fn ccc(ch: char) -> u8 {
14 let plane_offset = CCC_PLANE_OFFSETS[ch as u32 as usize >> 16];
16 let plane_end = CCC_PLANE_OFFSETS[((ch as u32 >> 16) + 1) as usize];
17 let plane = &CCC_PLANE_DATA[plane_offset..plane_end];
18 let ch_lsb = ch as u32 as u16;
19 let ix = plane
20 .binary_search_by(|entry| match entry.low_plane_end_ix.cmp(&ch_lsb) {
21 Ordering::Equal => Ordering::Less,
22 x => x,
23 })
24 .unwrap_or_else(|x| x);
25 if ix < plane.len() {
26 let entry = &plane[ix];
27 if entry.range().contains(&ch_lsb) {
28 return entry.ccc;
29 }
30 }
31 0
32}
33
34fn string_table_lookup(
36 ch: char,
37 plane_offsets: &[usize],
38 plane_data: &[MappingOffset],
39 string_data: &'static [u8],
40) -> Option<&'static str> {
41 let plane_start = plane_offsets[(ch as u32 >> 16) as usize];
43 let plane_end = plane_offsets[((ch as u32 >> 16) + 1) as usize];
44 let plane = &plane_data[plane_start..plane_end];
45 let ch_lsb = ch as u32 as u16;
47 if let Ok(ix) = plane.binary_search_by(|entry| entry.low_plane_ix.cmp(&ch_lsb)) {
48 let offset = plane[ix].offset as usize;
50 let end = if plane.len() == ix + 1 {
51 if plane_end < plane_data.len() {
52 plane_data[plane_end].offset as usize
53 } else {
54 string_data.len()
55 }
56 } else {
57 plane[ix + 1].offset as usize
58 };
59 let out = &string_data[offset..end];
60 return Some(unsafe {
61 std::str::from_utf8_unchecked(out)
63 });
64 }
65 return None;
66}
67
68pub fn decomposition(ch: char) -> Option<&'static str> {
70 string_table_lookup(ch, DECOMP_PLANE_OFFSETS, DECOMP_PLANE_DATA, DECOMP_STRINGS)
71}
72
73pub fn default_ignorable(ch: char) -> bool {
75 let plane_offset = IGNORABLE_PLANE_OFFSETS[ch as u32 as usize >> 16];
77 let plane_end = IGNORABLE_PLANE_OFFSETS[((ch as u32 >> 16) + 1) as usize];
78 let plane = &IGNORABLE_PLANE_DATA[plane_offset..plane_end];
79 let ch_lsb = ch as u32 as u16;
80 let ix = plane
81 .binary_search_by(|entry| match entry.end.cmp(&ch_lsb) {
82 Ordering::Equal => Ordering::Less,
83 x => x,
84 })
85 .unwrap_or_else(|x| x);
86 if ix < plane.len() {
87 plane[ix].contains(&ch_lsb)
88 } else {
89 false
90 }
91}
92
93pub fn casefold(ch: char) -> Option<&'static str> {
94 string_table_lookup(ch, CASEFOLD_PLANE_OFFSETS, CASEFOLD_PLANE_DATA, CASEFOLD_STRINGS)
95}
96
97#[cfg(test)]
98mod test {
99 use super::*;
100
101 #[test]
102 fn test_ccc() {
103 assert_eq!(ccc('\u{0}'), 0);
104 assert_eq!(ccc('A'), 0);
105 assert_eq!(ccc('\u{059a}'), 222);
106 assert_eq!(ccc('\u{2cee}'), 0);
107 assert_eq!(ccc('\u{2cef}'), 230);
108 assert_eq!(ccc('\u{2cf0}'), 230);
109 assert_eq!(ccc('\u{2cf1}'), 230);
110 assert_eq!(ccc('\u{2cf2}'), 0);
111 }
112
113 #[test]
114 fn test_decomposition() {
115 assert_eq!(decomposition('\u{f900}'), Some("\u{8c48}"));
116 assert_eq!(decomposition('A'), None);
117 assert_eq!(decomposition('\u{1f82}'), Some("α\u{313}\u{300}\u{345}"));
118 }
119
120 #[test]
121 fn test_deafault_ignorable() {
122 assert!(!default_ignorable('\u{00ac}'));
123 assert!(default_ignorable('\u{00ad}'));
124 assert!(!default_ignorable('\u{00ae}'));
125 assert!(!default_ignorable('\u{115e}'));
126 assert!(default_ignorable('\u{115f}'));
127 assert!(default_ignorable('\u{1160}'));
128 assert!(!default_ignorable('\u{1161}'));
129 }
130
131 #[test]
132 fn test_casefold() {
133 assert_eq!(casefold('A'), Some("a"));
134 assert_eq!(casefold('a'), None);
135 }
136
137 #[test]
138 fn test_last_char() {
139 assert_eq!(decomposition('\u{10000}'), None);
143 assert_eq!(decomposition('\u{1fbca}'), None);
144 assert_eq!(decomposition('\u{1fbf9}'), None);
145 assert_eq!(decomposition('\u{20000}'), None);
146 assert_eq!(decomposition('\u{2fa1d}'), Some("\u{2a600}"));
147 assert_eq!(casefold('\u{ff39}'), Some("\u{ff59}"));
148 assert_eq!(casefold('\u{ff3a}'), Some("\u{ff5a}"));
149 assert_eq!(casefold('\u{ff3b}'), None);
150 assert_eq!(casefold('\u{10400}'), Some("\u{10428}"));
151 assert_eq!(casefold('\u{1e920}'), Some("\u{1e942}"));
152 assert_eq!(casefold('\u{1e921}'), Some("\u{1e943}"));
153 assert_eq!(casefold('\u{1e922}'), None);
154 }
155
156 #[test]
157 fn test_high_char() {
158 assert_eq!(decomposition('\u{10FFFF}'), None);
160 assert_eq!(casefold('\u{10FFFF}'), None);
161 assert_eq!(default_ignorable('\u{10FFFF}'), false);
162 }
163}