Skip to content

Commit 49f1a29

Browse files
authored
Improved efficiency of repo permissions update (#7791)
1 parent 5ec9f19 commit 49f1a29

4 files changed

Lines changed: 423 additions & 106 deletions

File tree

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
- Store start and end date for courses (#7783)
1515
- Split courses into Current and Past sections for all users (#7801)
1616
- Batch the calculation of TA grading statistics on assignments index page (#7787)
17+
- Improve git repo access time, by reducing db queries (#7791)
1718

1819
### 🐛 Bug fixes
1920
- Prevent "No rows found" message from displaying in tables when data is loading (#7790)

app/lib/repository.rb

Lines changed: 94 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -224,28 +224,6 @@ def self.update_permissions_after(only_on_request: false, &block)
224224
nil
225225
end
226226

227-
# Returns the assignments for which students have repository access.
228-
#
229-
# Repository authentication subtleties:
230-
# 1) a repository is associated with a Group, but..
231-
# 2) ..students are associated with a Grouping (an "instance" of Group for a specific Assignment)
232-
# That creates a problem since authentication in git is at the repository level, while Markus handles it at
233-
# the assignment level, allowing the same Group repo to have different students according to the assignment.
234-
# The two extremes to implement it are using the union of all students (permissive) or the intersection
235-
# (restrictive). Instead, we are going to take a last-deadline approach, where we assume that the valid students at
236-
# any point in time are the ones valid for the last assignment due. (Basically, it's nice for a group to share a
237-
# repo among assignments, but at a certain point during the course we may want to add or [more frequently] remove
238-
# some students from it)
239-
def self.get_repo_auth_records
240-
records = Assignment.joins(:assignment_properties, :course)
241-
.includes(groupings: [:group, { accepted_students: :section }])
242-
.where(assignment_properties: { vcs_submit: true }, 'courses.is_hidden': false)
243-
.order(due_date: :desc)
244-
records.where(assignment_properties: { is_timed: false })
245-
.or(records.where.not(groupings: { start_time: nil }))
246-
.or(records.where(groupings: { start_time: nil }, due_date: Time.utc(0)..Time.current))
247-
end
248-
249227
# Return a nested hash of the form { assignment_id => { section_id => visibility } } where visibility
250228
# is a boolean indicating whether the given assignment is visible to the given section.
251229
def self.visibility_hash
@@ -308,14 +286,13 @@ def self.get_all_permissions
308286
instructors.each do |course_name, instructor_names|
309287
permissions[File.join(course_name, '*')] = instructor_names
310288
end
311-
self.get_repo_auth_records.each do |assignment|
312-
assignment.valid_groupings.each do |valid_grouping|
313-
next unless visibility[assignment.id][valid_grouping.inviter&.section&.id]
314-
repo_name = valid_grouping.group.repository_relative_path
315-
accepted_students = valid_grouping.accepted_students.where('roles.hidden': false).map(&:user_name)
316-
permissions[repo_name] = accepted_students
317-
end
289+
290+
# Bulk query for student permissions (optimized to avoid N+1)
291+
student_permissions = get_student_permissions_bulk(visibility)
292+
student_permissions.each do |repo_path, user_names|
293+
permissions[repo_path] = user_names
318294
end
295+
319296
# NOTE: this will allow graders to access the files in the entire repository
320297
# even if they are the grader for only a single assignment
321298
graders_info = TaMembership.joins(role: [:user, :course],
@@ -329,6 +306,94 @@ def self.get_all_permissions
329306
permissions
330307
end
331308

309+
# Bulk query to get student permissions without N+1 queries.
310+
# Returns a hash of { repo_path => [user_names] }
311+
def self.get_student_permissions_bulk(visibility)
312+
current_time = Time.current
313+
accepted_statuses = [StudentMembership::STATUSES[:accepted], StudentMembership::STATUSES[:inviter]]
314+
rejected_status = StudentMembership::STATUSES[:rejected]
315+
inviter_status = StudentMembership::STATUSES[:inviter]
316+
317+
# Step 1: Get membership counts per grouping for is_valid? check
318+
# (grouping is valid if instructor_approved OR non_rejected_count >= group_min)
319+
membership_counts = StudentMembership
320+
.where.not(membership_status: rejected_status)
321+
.group(:grouping_id)
322+
.count
323+
324+
# Step 2: Get inviter section_id for each grouping (for visibility check)
325+
inviter_sections = StudentMembership
326+
.joins(:role)
327+
.where(membership_status: inviter_status)
328+
.pluck(:grouping_id, 'roles.section_id')
329+
.to_h
330+
331+
# Step 3: Bulk query for all relevant data
332+
# Timed assignment filter: non-timed OR started OR (not started AND past due date)
333+
timed_filter = <<~SQL.squish
334+
(assignment_properties.is_timed = false
335+
OR groupings.start_time IS NOT NULL
336+
OR (groupings.start_time IS NULL AND assessments.due_date <= :current_time))
337+
SQL
338+
339+
raw_data = Assignment
340+
.joins(:assignment_properties, :course)
341+
.joins(groupings: [:group, { accepted_student_memberships: { role: :user } }])
342+
.where(assignment_properties: { vcs_submit: true })
343+
.where('courses.is_hidden': false)
344+
.where('roles.hidden': false)
345+
.where(memberships: { membership_status: accepted_statuses })
346+
.where(timed_filter, current_time: current_time)
347+
.order(due_date: :desc)
348+
.pluck(
349+
'assessments.id',
350+
'groupings.id',
351+
'groupings.instructor_approved',
352+
'assignment_properties.group_min',
353+
'courses.name',
354+
'groups.repo_name',
355+
'users.user_name'
356+
)
357+
358+
# Step 4: Process results in Ruby (now O(n) iteration, not O(n) DB queries)
359+
# Group by assignment first to preserve due_date DESC ordering (last-deadline approach)
360+
permissions = Hash.new { |h, k| h[k] = [] }
361+
processed_repos = Set.new
362+
363+
# Group by assignment_id first (preserves due_date ordering), then by grouping_id
364+
by_assignment = raw_data.group_by { |row| row[0] } # group by assignment_id
365+
366+
by_assignment.each do |assignment_id, assignment_rows|
367+
by_grouping = assignment_rows.group_by { |row| row[1] } # group by grouping_id
368+
369+
by_grouping.each do |grouping_id, rows|
370+
first_row = rows.first
371+
instructor_approved = first_row[2]
372+
group_min = first_row[3]
373+
course_name = first_row[4]
374+
repo_name = first_row[5]
375+
repo_path = File.join(course_name, repo_name)
376+
377+
# Last-deadline approach: skip if repo already processed by earlier (later due_date) assignment
378+
next if processed_repos.include?(repo_path)
379+
380+
# Check if grouping is valid (same logic as Grouping#is_valid?)
381+
non_rejected_count = membership_counts[grouping_id] || 0
382+
is_valid = instructor_approved || non_rejected_count >= group_min
383+
next unless is_valid
384+
385+
# Check visibility based on inviter's section
386+
inviter_section_id = inviter_sections[grouping_id]
387+
next unless visibility[assignment_id][inviter_section_id]
388+
389+
processed_repos << repo_path
390+
permissions[repo_path] = rows.map { |row| row[6] }.uniq
391+
end
392+
end
393+
394+
permissions
395+
end
396+
332397
# '*' which is reserved to indicate all repos when setting permissions
333398
# TODO: add to this if needed
334399
def self.reserved_locations

lib/tasks/benchmark.rake

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Benchmark test data for performance testing.
2+
#
3+
# Usage:
4+
# rake markus:benchmark:create
5+
# NUM_ASSIGNMENTS=10 GROUPINGS=100 rake markus:benchmark:create
6+
# rake markus:benchmark:cleanup
7+
#
8+
# Tests repository permissions (get_student_permissions_bulk) and
9+
# TA statistics (cache_ta_results).
10+
11+
require 'factory_bot_rails'
12+
13+
namespace :markus do
14+
namespace :benchmark do
15+
DATA_FILE = Rails.root.join('tmp/benchmark_data.json')
16+
17+
desc 'Create benchmark test data'
18+
task create: :environment do
19+
num_assignments = ENV.fetch('NUM_ASSIGNMENTS', '5').to_i
20+
num_groupings = ENV.fetch('GROUPINGS', '50').to_i
21+
complete_ratio = ENV.fetch('COMPLETE_RATIO', '0.5').to_f
22+
timestamp = Time.now.to_i
23+
24+
course = Course.first
25+
abort 'No course found. Run db:seed first.' unless course
26+
27+
puts "Creating #{num_assignments} assignments x #{num_groupings} groupings..."
28+
29+
existing_ta = course.tas.first
30+
ta = existing_ta || FactoryBot.create(:ta, course: course)
31+
ta_created = existing_ta.nil?
32+
assignment_ids = []
33+
34+
ActiveRecord::Base.transaction do
35+
num_assignments.times do |i|
36+
assignment = FactoryBot.create(
37+
:assignment,
38+
course: course,
39+
short_identifier: "BENCH#{i}_#{timestamp}",
40+
due_date: 1.week.ago,
41+
assignment_properties_attributes: { vcs_submit: true }
42+
)
43+
assignment_ids << assignment.id
44+
45+
FactoryBot.create(:rubric_criterion, assignment: assignment)
46+
47+
num_groupings.times do |j|
48+
group = FactoryBot.create(
49+
:group,
50+
course: course,
51+
group_name: "bench_#{i}_#{j}_#{timestamp}"
52+
)
53+
grouping = FactoryBot.create(
54+
:grouping_with_inviter,
55+
assignment: assignment,
56+
group: group
57+
)
58+
FactoryBot.create(:ta_membership, grouping: grouping, role: ta)
59+
60+
submission = FactoryBot.create(:version_used_submission, grouping: grouping)
61+
grouping.update_column(:is_collected, true)
62+
63+
result = submission.current_result
64+
result.create_marks
65+
66+
if rand < complete_ratio
67+
result.marks.each { |m| m.update_column(:mark, rand(0..m.criterion.max_mark.to_i)) }
68+
result.update_column(:marking_state, Result::MARKING_STATES[:complete])
69+
end
70+
end
71+
print '.'
72+
end
73+
end
74+
75+
puts "\nDone."
76+
FileUtils.mkdir_p(File.dirname(DATA_FILE))
77+
File.write(DATA_FILE, { assignment_ids: assignment_ids, ta_id: ta.id, ta_created: ta_created }.to_json)
78+
end
79+
80+
desc 'Remove benchmark test data'
81+
task cleanup: :environment do
82+
unless File.exist?(DATA_FILE)
83+
puts 'No benchmark data found.'
84+
next
85+
end
86+
87+
data = JSON.parse(File.read(DATA_FILE), symbolize_names: true)
88+
assignment_ids = data[:assignment_ids]
89+
90+
puts "Removing #{assignment_ids.size} assignments..."
91+
92+
ActiveRecord::Base.transaction do
93+
# Get IDs before deleting
94+
group_ids = Grouping.where(assessment_id: assignment_ids).pluck(:group_id)
95+
result_ids = Result.joins(submission: :grouping)
96+
.where(groupings: { assessment_id: assignment_ids }).ids
97+
98+
# Get student role and user IDs before deleting memberships
99+
student_role_ids = StudentMembership.joins(:grouping)
100+
.where(groupings: { assessment_id: assignment_ids })
101+
.pluck(:role_id)
102+
student_user_ids = Role.where(id: student_role_ids).pluck(:user_id)
103+
104+
# Delete in FK order
105+
Mark.where(result_id: result_ids).delete_all
106+
Result.where(id: result_ids).delete_all
107+
Submission.joins(:grouping).where(groupings: { assessment_id: assignment_ids }).delete_all
108+
Membership.joins(:grouping).where(groupings: { assessment_id: assignment_ids }).delete_all
109+
Grouping.where(assessment_id: assignment_ids).delete_all
110+
111+
Criterion.where(assessment_id: assignment_ids).find_each { |c| c.levels.delete_all }
112+
Criterion.where(assessment_id: assignment_ids).delete_all
113+
AssignmentProperties.where(assessment_id: assignment_ids).delete_all
114+
SubmissionRule.where(assessment_id: assignment_ids).delete_all
115+
Assignment.where(id: assignment_ids).delete_all
116+
Group.where(id: group_ids).delete_all
117+
118+
# Delete student roles and their users
119+
GradeEntryStudent.where(role_id: student_role_ids).delete_all
120+
Role.where(id: student_role_ids).delete_all
121+
User.where(id: student_user_ids).delete_all
122+
123+
# Delete TA if it was created by benchmark
124+
if data[:ta_created]
125+
ta = Role.find_by(id: data[:ta_id])
126+
if ta
127+
ta_user_id = ta.user_id
128+
GraderPermission.where(role_id: ta.id).delete_all
129+
ta.delete
130+
User.where(id: ta_user_id).delete
131+
end
132+
end
133+
end
134+
135+
File.delete(DATA_FILE)
136+
puts 'Done.'
137+
end
138+
end
139+
end

0 commit comments

Comments
 (0)