This repository was archived by the owner on Feb 16, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.rb
More file actions
executable file
·73 lines (60 loc) · 1.95 KB
/
parser.rb
File metadata and controls
executable file
·73 lines (60 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env ruby
require 'bundler/setup'
require 'elasticsearch'
CONVERTED_DIR = File.join(File.expand_path('..', __FILE__), 'converted')
INDEX_NAME = 'agenda_with_chapters'
client = Elasticsearch::Client.new hosts: [
{ host: ENV['ELASTICSEARCH_1_PORT_9200_TCP_ADDR'] || 'localhost', port: ENV['ELASTICSEARCH_1_PORT_9200_TCP_PORT'] || 9200 }
]
puts "Deleting index #{INDEX_NAME} from Database"
client.indices.delete index: INDEX_NAME
# https://github.com/elasticsearch/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/indices/create.rb#L10
# http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-core-types.html
# http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/index-modules-similarity.html
# http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html
client.indices.create index: INDEX_NAME, body: {
mappings: {
chapter: {
properties: {
party: {
type: 'string',
index: 'not_analyzed'
},
position: {
type: 'integer'
},
text: {
type: 'string',
analyzer: 'german'
},
title: {
type: 'string'
}
}
}
}
}
id = 1
Dir.foreach(CONVERTED_DIR) do |file|
next if file[0] == '.'
party = File.basename(file, '.md')
puts "Party: #{party}"
chapters = []
title, text = nil, ''
File.readlines(File.join(CONVERTED_DIR, file)).each do |line|
if line =~ /^##[^#](.+)/
chapters << [title, text] unless title.nil?
title = $1.strip
text = ''
else
text << line
end
end
puts "Adding #{chapters.size} #{party} chapters to Elasticsearch"
chapters.each_with_index do |(title, text), i|
puts "ID #{id} chapter #{i+1}: #{title}"
client.index index: INDEX_NAME, type: 'chapter', id: id, body: { party: party, position: i+1, title: title, text: text }
id += 1
end
end
puts "Done!"