This commit is contained in:
parent
b410540f7f
commit
4c88eb4121
9 changed files with 542 additions and 0 deletions
11
timezone/__init__.py
Normal file
11
timezone/__init__.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
""" Timezone management
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- `search.py`: Fast prefix search for timezones
|
||||||
|
- `timezone.py`: Conversion functions
|
||||||
|
- `api.py`: A REST API for the functionality provided by this package
|
||||||
|
"""
|
||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
app = Blueprint('timezone', __name__, template_folder='templates')
|
||||||
|
from . import api
|
15
timezone/api.py
Normal file
15
timezone/api.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from flask import request, jsonify
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from . import app
|
||||||
|
|
||||||
|
@app.route('/api/v1/autocomplete', methods=['GET'])
|
||||||
|
def autocomplete_timezone():
|
||||||
|
complete_str = request.args.get('complete')
|
||||||
|
if not complete_str: return "No part"
|
||||||
|
|
||||||
|
return complete_str
|
||||||
|
|
1
timezone/data/search.py
Normal file
1
timezone/data/search.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
def suggest(prefix):
|
147
timezone/data/util/patricia_trie.py
Normal file
147
timezone/data/util/patricia_trie.py
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
class PatriciaTrie:
|
||||||
|
def __init__(self):
|
||||||
|
self.root = Node()
|
||||||
|
|
||||||
|
def find(self, prefix, node=None, collector=""):
|
||||||
|
if not node: return self.find(prefix, self.root)
|
||||||
|
|
||||||
|
logging.debug(f"Looking for prefix {prefix} at {node.elem}")
|
||||||
|
if not prefix:
|
||||||
|
res = []
|
||||||
|
if node.leaf:
|
||||||
|
logging.debug(f"Found leaf {node.elem}")
|
||||||
|
res.append(collector)
|
||||||
|
for child in node.children:
|
||||||
|
logging.debug(f"Looking for leafs in {node.elem}")
|
||||||
|
res.extend(self._find(prefix, child, collector+child.elem))
|
||||||
|
logging.debug(f"Result for {node.elem}: {res}")
|
||||||
|
return res
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
if prefix.startswith(child.elem):
|
||||||
|
return self._find(prefix[len(child.elem):], child, collector+child.elem)
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
def add(self, elem):
|
||||||
|
(node, split_idx, elem_rest) = self.find_longest_match(elem, self.root)
|
||||||
|
|
||||||
|
def new_child():
|
||||||
|
return Node(elem=elem_rest, parent=node, leaf=True, children=[])
|
||||||
|
|
||||||
|
def split_node(leaf):
|
||||||
|
(oelem, ochild, oleaf) = (node.elem, node.children, node.leaf)
|
||||||
|
node.leaf = leaf
|
||||||
|
node.elem = oelem[:split_idx]
|
||||||
|
node.children = []
|
||||||
|
|
||||||
|
node.children.append(Node(elem=oelem[split_idx:], parent=node, leaf=oleaf, children=ochild))
|
||||||
|
|
||||||
|
# elem already found in trie
|
||||||
|
# just make sure node is marked as leaf
|
||||||
|
if not split_idx and not elem_rest:
|
||||||
|
node.leaf = True
|
||||||
|
return
|
||||||
|
|
||||||
|
# - elem not in trie
|
||||||
|
# - parent node exhausted
|
||||||
|
# This can happen if parent is root, or elem is larger than
|
||||||
|
# largest matching elem in trie so far
|
||||||
|
if not split_idx:
|
||||||
|
node.children.append(Node(elem=elem_rest, parent=node, leaf=True, children=[]))
|
||||||
|
return
|
||||||
|
|
||||||
|
# - elem already found in trie
|
||||||
|
# - elem ends in the middle of a node
|
||||||
|
# This can happen if an existing node up to index and its
|
||||||
|
# parents make up the entire elem. We need to split
|
||||||
|
# the node at split_idx and mark it as leaf.
|
||||||
|
if not elem_rest:
|
||||||
|
old_elem = node.elem
|
||||||
|
old_children = node.children
|
||||||
|
old_leaf = node.leaf
|
||||||
|
|
||||||
|
node.leaf = True
|
||||||
|
node.elem = old_elem[:split_idx]
|
||||||
|
node.children = []
|
||||||
|
|
||||||
|
split_node = Node(elem=old_elem[split_idx:], parent=node, leaf=old_leaf, children=old_children)
|
||||||
|
|
||||||
|
node.children.append(split_node)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
# - elem not found in trie
|
||||||
|
# - node up to split_idx and its parent make up elem
|
||||||
|
# Node needs to be split at split_idx (preserving leaf status for split off old node) and
|
||||||
|
# a new child is added for elem
|
||||||
|
old_children = node.children
|
||||||
|
old_leaf = node.leaf
|
||||||
|
|
||||||
|
node.leaf = False
|
||||||
|
node.elem = old_elem[:split_idx]
|
||||||
|
node.children = []
|
||||||
|
|
||||||
|
node_a = Node(elem=old_elem[split_idx:], parent=node, leaf=old_leaf, children=old_children)
|
||||||
|
node_b = Node(elem=elem_rest, parent=node, leaf=True, children=[])
|
||||||
|
node.children.append(node_a)
|
||||||
|
node.children.append(node_b)
|
||||||
|
|
||||||
|
def find_longest_match(self, elem, node):
|
||||||
|
for child in node.children:
|
||||||
|
if not child.elem or not elem: continue
|
||||||
|
|
||||||
|
# child does not match
|
||||||
|
if child.elem[0] is not elem[0]: continue
|
||||||
|
|
||||||
|
# child matches completely
|
||||||
|
if elem.startswith(child.elem):
|
||||||
|
# special case: the node already exists
|
||||||
|
if len(elem) == len(child.elem):
|
||||||
|
return (child, None, None)
|
||||||
|
# recourse down the trie
|
||||||
|
return self.find_longest_match(elem[len(child.elem):], child)
|
||||||
|
|
||||||
|
# elem matches completely, implies that elem is shorter
|
||||||
|
# than child.elem. Split child at len(elem)
|
||||||
|
if child.elem.startswith(elem):
|
||||||
|
return (child, len(elem), None)
|
||||||
|
|
||||||
|
# child does not match completely but at least first char matches
|
||||||
|
# find longest split index
|
||||||
|
for i in range(len(elem)):
|
||||||
|
if elem[i] == child.elem[i]: continue
|
||||||
|
else: return (child, i, elem[i:])
|
||||||
|
|
||||||
|
# No child(-prefix) matched, create another child
|
||||||
|
return (node, None, elem)
|
||||||
|
|
||||||
|
def to_dot(self):
|
||||||
|
print("graph {")
|
||||||
|
self._to_dot(self.root)
|
||||||
|
print("}")
|
||||||
|
|
||||||
|
def _to_dot(self, node):
|
||||||
|
for child in node.children:
|
||||||
|
if not node.elem: print(f'root -- "{child.elem}";')
|
||||||
|
else: print(f'"{node.elem}" -- "{child.elem}";')
|
||||||
|
|
||||||
|
if child.leaf:
|
||||||
|
print(f'"{child.elem}" [color=blue];')
|
||||||
|
|
||||||
|
self._to_dot(child)
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, elem=None, parent=None, children=[],
|
||||||
|
leaf=False, offset=0, title=None, info=None):
|
||||||
|
self.elem = elem
|
||||||
|
self.parent = parent
|
||||||
|
self.children = children
|
||||||
|
self.leaf = leaf
|
||||||
|
|
||||||
|
# payload
|
||||||
|
self.offset = offset
|
||||||
|
self.title = title if title else elem
|
||||||
|
self.info = info
|
32
timezone/data/util/preprocessor.py
Normal file
32
timezone/data/util/preprocessor.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# Create Patricia Tries from various datasets
|
||||||
|
#
|
||||||
|
# Each Trie leaf has a timezone assigned which
|
||||||
|
# may be a fixed UTC offset or a tz timezone
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from patricia_trie import PatriciaTrie
|
||||||
|
|
||||||
|
# Geonames from
|
||||||
|
# http://download.geonames.org/export/dump/allCountries.zip
|
||||||
|
def geonames_allcountries(path):
|
||||||
|
patricia = PatriciaTrie()
|
||||||
|
|
||||||
|
with open(path) as all_countries_csv:
|
||||||
|
reader = csv.reader(all_countries_csv, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
if row[6] != "P": continue
|
||||||
|
|
||||||
|
place_clean = row[2].replace(' ', '')
|
||||||
|
patricia.add(place_clean)
|
||||||
|
|
||||||
|
return patricia
|
||||||
|
|
||||||
|
# Timezone abbreviations from
|
||||||
|
# https://www.timeanddate.com/time/zones/
|
||||||
|
def timezone_abbreviations():
|
||||||
|
return
|
||||||
|
|
||||||
|
# Timezones from
|
||||||
|
# https://www.iana.org/time-zones
|
||||||
|
def tz_zones():
|
||||||
|
return
|
7
timezone/data/util/test.py
Normal file
7
timezone/data/util/test.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
import preprocessor
|
||||||
|
import sys
|
||||||
|
|
||||||
|
p = preprocessor.geonames_allcountries("/home/armin/Downloads/allCountries/allCountries10000.txt")
|
||||||
|
|
||||||
|
with open("/home/armin/Desktop/test.dot", "w") as sys.stdout:
|
||||||
|
p.to_dot()
|
0
timezone/search/__init__.py
Normal file
0
timezone/search/__init__.py
Normal file
300
timezone/search/trie.py
Normal file
300
timezone/search/trie.py
Normal file
|
@ -0,0 +1,300 @@
|
||||||
|
"""Radix Trie with radix 256
|
||||||
|
|
||||||
|
A Radix Trie[1] - once built - allows efficient prefix search. The trie works
|
||||||
|
on byte strings and hence is oblivious to encoding. The encoding for creation
|
||||||
|
and search must match. Payload of each node can be an arbitrary object.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
.. code :: python
|
||||||
|
t = Trie()
|
||||||
|
t.add("Hello", "P1")
|
||||||
|
t.add("Hi", "P2")
|
||||||
|
t.add("Hela", "P3")
|
||||||
|
t.find("He") # ["P1", "P3"]
|
||||||
|
|
||||||
|
[1] https://en.wikipedia.org/wiki/Radix_tree
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import Sequence, MutableSequence, ByteString, Any, Optional
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
import logging
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class Trie:
|
||||||
|
def __init__(self, multi_value=False):
|
||||||
|
self.root = Root([])
|
||||||
|
self.multi_value = multi_value
|
||||||
|
|
||||||
|
def insert(self, label: ByteString, content: Any):
|
||||||
|
log.info(f"Inserting {label} into Trie")
|
||||||
|
start = self.root.child_by_common_prefix(label)
|
||||||
|
if not start:
|
||||||
|
log.debug(f"Creating new terminal for {label} at root")
|
||||||
|
new_node = Terminal(label, content, self.root, [], self.multi_value)
|
||||||
|
self.root.put_child(new_node)
|
||||||
|
return new_node
|
||||||
|
log.debug(f"Found match {start} for {label}. Traversing down")
|
||||||
|
self._insert(start, label, content)
|
||||||
|
|
||||||
|
def _insert(self, node, label, content):
|
||||||
|
log.info(f"Inserting {label} into Trie at {node}")
|
||||||
|
if node.has_label(label):
|
||||||
|
log.debug(f"{node} equals {label}. Wrapping node as Terminal.")
|
||||||
|
if isinstance(node, Terminal) and not self.multi_value:
|
||||||
|
log.warning(f"{node} is already a Terminal. Content will be overwritten.")
|
||||||
|
terminal = Terminal.from_child(node, content, self.multi_value)
|
||||||
|
node.replace_with(terminal)
|
||||||
|
return terminal
|
||||||
|
|
||||||
|
if node.is_prefix_of(label):
|
||||||
|
log.debug(f"{node} is prefix of {label}")
|
||||||
|
cutoff = node.cut_from(label)
|
||||||
|
next_node = node.child_by_common_prefix(cutoff)
|
||||||
|
if not next_node:
|
||||||
|
log.debug(f"No matching child found for {cutoff}. Creating new child terminal.")
|
||||||
|
terminal = Terminal(cutoff, content, node, [], self.multi_value)
|
||||||
|
node.put_child(terminal)
|
||||||
|
return terminal
|
||||||
|
else:
|
||||||
|
log.debug(f"Found match {next_node} for {cutoff}. Traversing down.")
|
||||||
|
return self._insert(next_node, cutoff, content)
|
||||||
|
|
||||||
|
if node.starts_with(label):
|
||||||
|
log.debug(f"{label} is part of {node}. Creating new parent from {label}")
|
||||||
|
new_node = Terminal(label, content, node.parent, [], self.multi_value)
|
||||||
|
node.replace_with(new_node)
|
||||||
|
node.strip_prefix(label)
|
||||||
|
new_node.put_child(node)
|
||||||
|
return new_node
|
||||||
|
|
||||||
|
log.debug(f"{label} and {node} have a common ancestor")
|
||||||
|
common_prefix = node.common_prefix(label)
|
||||||
|
log.debug(f"Creating new ancestor for {common_prefix}")
|
||||||
|
ancestor = Child(common_prefix, node.parent, [])
|
||||||
|
node.replace_with(ancestor)
|
||||||
|
terminal = Terminal(cut_off_prefix(common_prefix, label), content, ancestor, [], self.multi_value)
|
||||||
|
node.strip_prefix(common_prefix)
|
||||||
|
ancestor.put_child(terminal)
|
||||||
|
ancestor.put_child(node)
|
||||||
|
return terminal
|
||||||
|
|
||||||
|
def find(self, prefix):
|
||||||
|
node = self._find(self.root, prefix)
|
||||||
|
return self._get_terminals(node, prefix)
|
||||||
|
|
||||||
|
def _find(self, node, prefix, collector=""):
|
||||||
|
cutoff = node.cut_from(prefix)
|
||||||
|
log.debug(f"Searching for {cutoff} in {node}")
|
||||||
|
child = node.child_by_prefix_match(cutoff)
|
||||||
|
if not child and not cutoff:
|
||||||
|
return node
|
||||||
|
elif not child and cutoff:
|
||||||
|
log.debug(f"Leftover cutoff {cutoff}. Trying to find node with prefix {cutoff}")
|
||||||
|
child = node.child_by_common_prefix(cutoff)
|
||||||
|
if not child or not child.starts_with(cutoff):
|
||||||
|
return None
|
||||||
|
log.debug(f"Found child {child} starting with {cutoff}")
|
||||||
|
return child
|
||||||
|
else: # child must be not None
|
||||||
|
log.debug(f"Found node {child} in {node} for {cutoff}. Traversing down.")
|
||||||
|
return self._find(child, cutoff)
|
||||||
|
|
||||||
|
def _get_terminals(self, node, label_builder):
|
||||||
|
if not node: return []
|
||||||
|
|
||||||
|
collector = []
|
||||||
|
if isinstance(node, Terminal):
|
||||||
|
collector.append((node, label_builder))
|
||||||
|
for child in node.children:
|
||||||
|
l = child.extend(label_builder)
|
||||||
|
collector.extend(self._get_terminals(child, l))
|
||||||
|
return collector
|
||||||
|
|
||||||
|
def to_dot(self) -> str:
|
||||||
|
return "graph {\n\n"+self.root.to_dot()+"\n}"
|
||||||
|
|
||||||
|
def has_common_prefix(label: ByteString, other_label: ByteString) -> bool:
|
||||||
|
""" Whether label and other_label have a prefix in common. """
|
||||||
|
assert label and other_label
|
||||||
|
return True if label[0] == other_label[0] else False
|
||||||
|
|
||||||
|
def common_prefix(label: ByteString, other_label: ByteString) -> ByteString:
|
||||||
|
""" Get the common prefix of label and other_label. """
|
||||||
|
buffer = bytearray()
|
||||||
|
for (a,b) in zip(label, other_label):
|
||||||
|
if a == b: buffer.append(a)
|
||||||
|
else: break
|
||||||
|
return buffer
|
||||||
|
|
||||||
|
def is_prefix_of(prefix: ByteString, label: ByteString) -> bool:
|
||||||
|
""" Whether label starts with prefix """
|
||||||
|
if len(prefix) > len(label):
|
||||||
|
return False
|
||||||
|
for (a,b) in zip(prefix, label):
|
||||||
|
if a != b: return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def find_first(predicate, iterable):
|
||||||
|
""" Return the first element in iterable that satisfies predicate or None """
|
||||||
|
try: return next(filter(predicate, iterable))
|
||||||
|
except StopIteration: return None
|
||||||
|
|
||||||
|
def cut_off_prefix(prefix: ByteString, label: ByteString) -> ByteString:
|
||||||
|
""" Cut prefix from start of label. Return rest of label. """
|
||||||
|
assert is_prefix_of(prefix, label)
|
||||||
|
return bytes(label[len(prefix):])
|
||||||
|
|
||||||
|
class Node(ABC):
|
||||||
|
def __init__(self, children: MutableSequence[Child]):
|
||||||
|
self.children = children
|
||||||
|
|
||||||
|
def child_by_common_prefix(self, label: ByteString) -> Optional[Child]:
|
||||||
|
""" Return Child that has a common prefix with label if one exists. """
|
||||||
|
def by_common_prefix(child: Child):
|
||||||
|
return has_common_prefix(child.label, label)
|
||||||
|
return find_first(by_common_prefix, self.children)
|
||||||
|
|
||||||
|
def child_by_prefix_match(self, label: ByteString) -> Optional[Child]:
|
||||||
|
""" Return Child which label is a prefix of the given label if one exists. """
|
||||||
|
def by_prefix_match(child: Child):
|
||||||
|
return is_prefix_of(child.label, label)
|
||||||
|
return find_first(by_prefix_match, self.children)
|
||||||
|
|
||||||
|
def put_child(self, child: Child):
|
||||||
|
""" Put child into this node's children. Replacing existing children. """
|
||||||
|
if child in self.children:
|
||||||
|
log.warning(f"Replacing child {child.label}")
|
||||||
|
self.remove_child(child)
|
||||||
|
child.parent = self
|
||||||
|
self.children.append(child)
|
||||||
|
|
||||||
|
def replace_child(self, child: Child, replacement: Child):
|
||||||
|
""" Remove child from this node's children and add replacement. """
|
||||||
|
self.remove_child(child)
|
||||||
|
self.put_child(replacement)
|
||||||
|
|
||||||
|
def remove_child(self, child: Child):
|
||||||
|
""" Remove child from this node's children """
|
||||||
|
if not child in self.children:
|
||||||
|
log.warning(f"Trying to delete {child.label} but it does not exist.")
|
||||||
|
self.children.remove(child)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def dot_label(self) -> str:
|
||||||
|
""" Readable label for this node in a dot graph """
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def dot_id(self) -> str:
|
||||||
|
""" Technical id for this node in a dot graph. Must be unique. """
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def cut_from(self, label: ByteString) -> ByteString:
|
||||||
|
""" Cut off node's label considered as prefix from label. """
|
||||||
|
...
|
||||||
|
|
||||||
|
def to_dot(self) -> str:
|
||||||
|
s = f'{self.dot_id()} [label="{self.dot_label()}"]\n'
|
||||||
|
for child in self.children:
|
||||||
|
s += f"{self.dot_id()} -- {child.dot_id()}\n"
|
||||||
|
s += child.to_dot()
|
||||||
|
return s
|
||||||
|
|
||||||
|
class Root(Node):
|
||||||
|
def cut_from(self, label: ByteString) -> ByteString:
|
||||||
|
return label
|
||||||
|
|
||||||
|
def dot_label(self):
|
||||||
|
return "root"
|
||||||
|
|
||||||
|
def dot_id(self):
|
||||||
|
return "root"
|
||||||
|
|
||||||
|
class Child(Node):
|
||||||
|
def __init__(self, label: ByteString, parent: Node, children: MutableSequence[Child]):
|
||||||
|
self.label = label
|
||||||
|
self.parent = parent
|
||||||
|
self.children = children
|
||||||
|
|
||||||
|
def __eq__(self, other_child):
|
||||||
|
return (isinstance(other_child, Child)
|
||||||
|
and self.label == other_child.label)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.label)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.label.decode('utf-8', 'replace').replace('"', '\\"')
|
||||||
|
|
||||||
|
def dot_label(self):
|
||||||
|
return self.label.decode('utf-8', 'replace').replace('"', '\\"')
|
||||||
|
|
||||||
|
def dot_id(self):
|
||||||
|
return id(self)
|
||||||
|
|
||||||
|
def has_label(self, label):
|
||||||
|
return self.label == label
|
||||||
|
|
||||||
|
def is_prefix_of(self, label):
|
||||||
|
return is_prefix_of(self.label, label)
|
||||||
|
|
||||||
|
def replace_with(self, new_child: Child):
|
||||||
|
new_child.parent = self.parent
|
||||||
|
self.parent.replace_child(self, new_child)
|
||||||
|
|
||||||
|
def starts_with(self, label: ByteString) -> bool:
|
||||||
|
return is_prefix_of(label, self.label)
|
||||||
|
|
||||||
|
def cut_from(self, label: ByteString) -> ByteString:
|
||||||
|
""" Cut node's label from (start of) label """
|
||||||
|
return cut_off_prefix(self.label, label)
|
||||||
|
|
||||||
|
def strip_prefix(self, prefix: ByteString):
|
||||||
|
""" Cut off prefix from node's label """
|
||||||
|
self.label = cut_off_prefix(prefix, self.label)
|
||||||
|
|
||||||
|
def extend(self, label: ByteString) -> ByteString:
|
||||||
|
""" Extend label by node's label """
|
||||||
|
return bytes(label) + bytes(self.label)
|
||||||
|
|
||||||
|
def split_label_at(self, index):
|
||||||
|
return (self.label[:index], self.label[index:])
|
||||||
|
|
||||||
|
def contains(self, label):
|
||||||
|
if len(label) > len(self.label):
|
||||||
|
return False
|
||||||
|
for (a,b) in zip(self.label, label):
|
||||||
|
if a != b: return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def common_prefix(self, label):
|
||||||
|
return common_prefix(self.label, label)
|
||||||
|
|
||||||
|
class Terminal(Child):
|
||||||
|
def __init__(self, label: ByteString, content: Any, parent: Node, children: MutableSequence[Child], multi_value: bool):
|
||||||
|
super().__init__(label, parent, children)
|
||||||
|
self.multi_value = multi_value
|
||||||
|
self.content = [content] if multi_value else content
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_child(cls, child: Child, content: Any, multi_value: bool):
|
||||||
|
# multi_value param has no effect if already a Terminal. I.e.
|
||||||
|
# from_child cannot change the multi-value stage of a child that
|
||||||
|
# is already a Terminal
|
||||||
|
if isinstance(child, Terminal) and child.multi_value:
|
||||||
|
# Create a new Terminal instance. Although not needed this is what is expected
|
||||||
|
# and compatible to the non-multi-value behaviour.
|
||||||
|
t = cls(child.label, content, child.parent, child.children, child.multi_value)
|
||||||
|
t.content.extend(child.content) # add back original content
|
||||||
|
return t
|
||||||
|
return cls(child.label, content, child.parent, child.children, multi_value)
|
||||||
|
|
||||||
|
def to_dot(self) -> str:
|
||||||
|
s = super().to_dot()
|
||||||
|
s += f"{self.dot_id()} [color=blue]\n"
|
||||||
|
return s
|
||||||
|
|
29
timezone/timezone.py
Normal file
29
timezone/timezone.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
import csv
|
||||||
|
from search.trie import Trie
|
||||||
|
|
||||||
|
def load_geonames():
|
||||||
|
t = Trie(multi_value=True)
|
||||||
|
with open("data/cities500.txt", "r") as f:
|
||||||
|
reader = csv.reader(f, delimiter='\t')
|
||||||
|
for i, row in enumerate(reader):
|
||||||
|
try:
|
||||||
|
t.insert(row[1].encode("utf-8"), row[17])
|
||||||
|
except Exception:
|
||||||
|
print(f"Error in row {i}")
|
||||||
|
print(f"Label: '{row[1]}'")
|
||||||
|
print(f"Type: {type(row[1])}")
|
||||||
|
raise
|
||||||
|
return t
|
||||||
|
|
||||||
|
def check_geonames():
|
||||||
|
with open("data/cities500.txt", "r") as f:
|
||||||
|
reader = csv.reader(f, delimiter='\t')
|
||||||
|
for i, row in enumerate(reader):
|
||||||
|
try:
|
||||||
|
if row[1].endswith("lea"):
|
||||||
|
print(f"{i}: {row[1]} \t\t\t {row[17]}")
|
||||||
|
except Exception:
|
||||||
|
print(f"Error in row {i}")
|
||||||
|
print(f"Label: '{row[1]}'")
|
||||||
|
print(f"Type: {type(row[1])}")
|
||||||
|
raise
|
Loading…
Reference in a new issue