-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdaily_readings.rb
More file actions
94 lines (71 loc) · 2.18 KB
/
daily_readings.rb
File metadata and controls
94 lines (71 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# frozen_string_literal: true
require 'nokogiri'
require 'httparty' # TODO, write classes with include HTTParty
require 'logger'
# require 'pg'
require_relative 'lib/element'
# config variables
DEBUG = 1
LOG_OUTPUT = 1
# function for viewing/storing the reading texts
def log_reading_text(reading_link, output_file)
reading_raw = HTTParty.get(reading_link).body
reading_doc = Nokogiri::HTML(reading_raw)
reading_title = reading_doc.search('#main #content section article h2')
reading_text = reading_doc.search('#main #content section article dl dd')
title = reading_title.text.strip
text = reading_text.text.strip
File.open(output_file, 'w') do |file|
file << title
file << "\n"
file << text
end
if DEBUG == 1
pp title
pp text
end
end
# MAIN
# load the page
URL = 'https://www.oca.org/readings/daily'
# scrape_page = URI.open(URL)
scrape_page = HTTParty.get(URL)
scrape_data = scrape_page.body
# pp scrape_page.code, scrape_page.message, scrape_page.headers.inspect
doc = Nokogiri::HTML(scrape_data)
# find all links
links = doc.search('a')
# see how many we're working with
puts "There are #{links.size} links found"
# title of the document
puts doc.title
class ScriptureReading < LinkElement
end
# now search the document for all reading elements
html = doc.search('#main #content section ul li a')
# array to store the scripture reading objects
recent_readings = []
html.each do |reading_link|
sr = ScriptureReading.new(reading_link['href'].prepend('https://www.oca.org'), reading_link.text.strip)
recent_readings << sr
end
# remove duplicate entries and alphabetize the objects
recent_readings = recent_readings.uniq.sort
# display contents
File.open('readings_data.txt', 'w') do |file|
recent_readings.each do |reading|
file << reading.to_s
end
end
# get final count
puts "#{recent_readings.size} Reading elements scraped"
# display for debugging purposes
pp recent_readings if DEBUG == 1
if LOG_OUTPUT == 1
# control variable for file output
reading_count = 0
# iterate over each daily reading and log output
recent_readings.each do |reading_link|
log_reading_text(reading_link.link, "reading_output_#{reading_count += 1}.txt")
end
end