83 lines
2 KiB
Python
83 lines
2 KiB
Python
|
import csv
|
||
|
from bytetrie import ByteTrie
|
||
|
|
||
|
def load_geonames():
|
||
|
t = ByteTrie(multi_value=True)
|
||
|
with open("cities500.txt", "r") as f:
|
||
|
reader = csv.reader(f, delimiter='\t')
|
||
|
for i, row in enumerate(reader):
|
||
|
try:
|
||
|
t.insert(row[1].encode("utf-8"), row[17])
|
||
|
except Exception:
|
||
|
print(f"Error in row {i}")
|
||
|
print(f"Label: '{row[1]}'")
|
||
|
print(f"Type: {type(row[1])}")
|
||
|
raise
|
||
|
return t
|
||
|
|
||
|
def insert(trie):
|
||
|
""" Shall only be used to insert strings """
|
||
|
t = trie
|
||
|
def _insert(*vals):
|
||
|
for val in vals:
|
||
|
t.insert(val.encode('utf-8'), val)
|
||
|
return _insert
|
||
|
|
||
|
def load_simple_trie():
|
||
|
t = ByteTrie()
|
||
|
ins = insert(t)
|
||
|
ins("A")
|
||
|
ins("AA", "AB")
|
||
|
ins("ABCDE")
|
||
|
ins("AACDEF", "AACDEGG", "AACDEH")
|
||
|
return t
|
||
|
|
||
|
# This uses internal representations which are not supposed to
|
||
|
# be used as public API and are subject to change!
|
||
|
from bytetrie.bytetrie import ByteTrie, Node, Root, Child, Terminal
|
||
|
def geonames_to_dot(t: ByteTrie):
|
||
|
dot_buffer = str()
|
||
|
dot_buffer += """strict digraph {
|
||
|
graph [
|
||
|
bgcolor="transparent"
|
||
|
];
|
||
|
|
||
|
edge [
|
||
|
arrowhead="none",
|
||
|
penwidth="0.05",
|
||
|
];
|
||
|
|
||
|
node [
|
||
|
label="",
|
||
|
sep="2"
|
||
|
];
|
||
|
|
||
|
root [shape="circle", width="0.4"]
|
||
|
"""
|
||
|
|
||
|
hue_inc = 1/len(t.root.children)
|
||
|
hue = 0
|
||
|
|
||
|
for child in t.root.children:
|
||
|
dot_buffer += _geonames_node_to_dot(t.root, child, 1, hue)
|
||
|
hue += hue_inc
|
||
|
|
||
|
dot_buffer += "}"
|
||
|
return dot_buffer
|
||
|
|
||
|
def _geonames_node_to_dot(p: Node, n: Child, depth, hue):
|
||
|
db = f'{p.dot_id()} -> {n.dot_id()} [color="{hue},{depth*0.1},85"]\n'
|
||
|
db += f'{n.dot_id()} [color="{hue},{depth*0.1},50", shape="circle", width="0.1"]\n'
|
||
|
|
||
|
for child in n.children:
|
||
|
db += _geonames_node_to_dot(n, child, depth+1, hue)
|
||
|
|
||
|
return db
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
t = load_geonames()
|
||
|
s = geonames_to_dot(t)
|
||
|
with open("geo_dot.dot", "w") as f:
|
||
|
f.write(s)
|