diff --git a/srcs/compose/compile.py b/srcs/compose/compile.py index 1e60ae8..2cb060e 100644 --- a/srcs/compose/compile.py +++ b/srcs/compose/compile.py @@ -64,11 +64,6 @@ def parse_sequences_file_xkb(fname): def parse_seq_result(r): if len(r) == 2 and r[0] == '\\': return r[1] - # The state machine can't represent characters that do not fit in a - # 16-bit char. This breaks some sequences that output letters with - # combined diacritics or emojis. - if len(r) > 1 or ord(r[0]) > 65535: - raise Exception("Char out of range: " + r) return r # Populate [char_names] with the information present in the file. with open(fname, "r") as inp: @@ -146,7 +141,15 @@ def make_automata(tree_root): states[i] = (c, node_i) i += 1 def add_leaf(c): - states.append((c, 1)) + # There are two encoding for leafs: character final state for 15-bit + # characters and string final state for the rest. + if len(c) > 1 or ord(c[0]) > 32767: # String final state + cb = c.encode("UTF-16") + states.append((-1, len(cb) + 1)) + for c in cb: + states.append((c, 0)) + else: # Character final state + states.append((c, 1)) def add_node(n): if type(n) == str: add_leaf(n) @@ -169,6 +172,7 @@ def gen_java(machine): chars_map = { # These characters cannot be used in unicode form as Java's parser # unescape unicode sequences before parsing. + -1: "\\uFFFF", "\"": "\\\"", "\\": "\\\\", "\n": "\\n", diff --git a/srcs/juloo.keyboard2/ComposeKey.java b/srcs/juloo.keyboard2/ComposeKey.java index ef5a7f2..328bb83 100644 --- a/srcs/juloo.keyboard2/ComposeKey.java +++ b/srcs/juloo.keyboard2/ComposeKey.java @@ -27,31 +27,48 @@ public final class ComposeKey } /** Apply the pending compose sequence to char [c]. */ - static KeyValue apply(int state, char c) + static KeyValue apply(int prev, char c) { char[] states = ComposeKeyData.states; char[] edges = ComposeKeyData.edges; - int length = edges[state]; - int next = Arrays.binarySearch(states, state + 1, state + length, c); + int prev_length = edges[prev]; + int next = Arrays.binarySearch(states, prev + 1, prev + prev_length, c); if (next < 0) return null; next = edges[next]; - // The next state is the end of a sequence, show the result. - if (edges[next] == 1) - return KeyValue.makeCharKey(states[next]); - return KeyValue.makeComposePending(String.valueOf(c), next, 0); + char next_header = states[next]; + if (next_header == 0) // Enter a new intermediate state. + return KeyValue.makeComposePending(String.valueOf(c), next, 0); + else if (next_header > 0) // Character final state. + return KeyValue.makeCharKey(next_header); + else // next_header is < 0, string final state. + { + int next_length = edges[next]; + return KeyValue.makeStringKey( + new String(states, next + 1, next + next_length)); + } } - /** The [states] array represents the different states and their transition. - A state occupies one or several cells of the array: - - The first cell is the result of the compose sequence if the state is of - size 1, unspecified otherwise. - - The remaining cells are the transitions, sorted alphabetically. + /** The state machine is comprised of two arrays. + + The [states] array represents the different states and the associated + transitions: + - The first cell is the header cell, [states[s]]. + - If the header is equal to [0], + The remaining cells are the transitions characters, sorted + alphabetically. + - If the header is positive, + This is a final state, [states[s]] is the result of the sequence. + In this case, [edges[s]] must be equal to [1]. + - If the header is equal to [-1], + This is a final state, the remaining cells represent the result string + which starts at index [s + 1] and has a length of [edges[s] - 1]. The [edges] array represents the transition state corresponding to each accepted inputs. - Id [states[i]] is the first cell of a state, [edges[i]] is the number of - cells occupied by the state [i]. - If [states[i]] is a transition, [edges[i]] is the index of the state to - jump into. */ + - If [states[s]] is a header cell, [edges[s]] is the number of cells + occupied by the state [s], including the header cell. + - If [states[s]] is a transition, [edges[s]] is the index of the state to + jump into. + - If [states[s]] is a part of a final state, [edges[s]] is not used. */ } diff --git a/srcs/juloo.keyboard2/ComposeKeyData.java b/srcs/juloo.keyboard2/ComposeKeyData.java index 999b397..2fbe4d0 100644 Binary files a/srcs/juloo.keyboard2/ComposeKeyData.java and b/srcs/juloo.keyboard2/ComposeKeyData.java differ