Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- Store start and end date for courses (#7783)
- Split courses into Current and Past sections for all users (#7801)
- Batch the calculation of TA grading statistics on assignments index page (#7787)
- Improve git repo access time, by reducing db queries (#7791)

### 🐛 Bug fixes
- Prevent "No rows found" message from displaying in tables when data is loading (#7790)
Expand Down
123 changes: 94 additions & 29 deletions app/lib/repository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -224,28 +224,6 @@ def self.update_permissions_after(only_on_request: false, &block)
nil
end

# Returns the assignments for which students have repository access.
#
# Repository authentication subtleties:
# 1) a repository is associated with a Group, but..
# 2) ..students are associated with a Grouping (an "instance" of Group for a specific Assignment)
# That creates a problem since authentication in git is at the repository level, while Markus handles it at
# the assignment level, allowing the same Group repo to have different students according to the assignment.
# The two extremes to implement it are using the union of all students (permissive) or the intersection
# (restrictive). Instead, we are going to take a last-deadline approach, where we assume that the valid students at
# any point in time are the ones valid for the last assignment due. (Basically, it's nice for a group to share a
# repo among assignments, but at a certain point during the course we may want to add or [more frequently] remove
# some students from it)
def self.get_repo_auth_records
records = Assignment.joins(:assignment_properties, :course)
.includes(groupings: [:group, { accepted_students: :section }])
.where(assignment_properties: { vcs_submit: true }, 'courses.is_hidden': false)
.order(due_date: :desc)
records.where(assignment_properties: { is_timed: false })
.or(records.where.not(groupings: { start_time: nil }))
.or(records.where(groupings: { start_time: nil }, due_date: Time.utc(0)..Time.current))
end

# Return a nested hash of the form { assignment_id => { section_id => visibility } } where visibility
# is a boolean indicating whether the given assignment is visible to the given section.
def self.visibility_hash
Expand Down Expand Up @@ -308,14 +286,13 @@ def self.get_all_permissions
instructors.each do |course_name, instructor_names|
permissions[File.join(course_name, '*')] = instructor_names
end
self.get_repo_auth_records.each do |assignment|
assignment.valid_groupings.each do |valid_grouping|
next unless visibility[assignment.id][valid_grouping.inviter&.section&.id]
repo_name = valid_grouping.group.repository_relative_path
accepted_students = valid_grouping.accepted_students.where('roles.hidden': false).map(&:user_name)
permissions[repo_name] = accepted_students
end

# Bulk query for student permissions (optimized to avoid N+1)
student_permissions = get_student_permissions_bulk(visibility)
student_permissions.each do |repo_path, user_names|
permissions[repo_path] = user_names
end

# NOTE: this will allow graders to access the files in the entire repository
# even if they are the grader for only a single assignment
graders_info = TaMembership.joins(role: [:user, :course],
Expand All @@ -329,6 +306,94 @@ def self.get_all_permissions
permissions
end

# Bulk query to get student permissions without N+1 queries.
# Returns a hash of { repo_path => [user_names] }
def self.get_student_permissions_bulk(visibility)
current_time = Time.current
accepted_statuses = [StudentMembership::STATUSES[:accepted], StudentMembership::STATUSES[:inviter]]
rejected_status = StudentMembership::STATUSES[:rejected]
inviter_status = StudentMembership::STATUSES[:inviter]

# Step 1: Get membership counts per grouping for is_valid? check
# (grouping is valid if instructor_approved OR non_rejected_count >= group_min)
membership_counts = StudentMembership
.where.not(membership_status: rejected_status)
.group(:grouping_id)
.count

# Step 2: Get inviter section_id for each grouping (for visibility check)
inviter_sections = StudentMembership
.joins(:role)
.where(membership_status: inviter_status)
.pluck(:grouping_id, 'roles.section_id')
.to_h

# Step 3: Bulk query for all relevant data
# Timed assignment filter: non-timed OR started OR (not started AND past due date)
timed_filter = <<~SQL.squish
(assignment_properties.is_timed = false
OR groupings.start_time IS NOT NULL
OR (groupings.start_time IS NULL AND assessments.due_date <= :current_time))
SQL

raw_data = Assignment
.joins(:assignment_properties, :course)
.joins(groupings: [:group, { accepted_student_memberships: { role: :user } }])
.where(assignment_properties: { vcs_submit: true })
.where('courses.is_hidden': false)
.where('roles.hidden': false)
.where(memberships: { membership_status: accepted_statuses })
.where(timed_filter, current_time: current_time)
.order(due_date: :desc)
.pluck(
'assessments.id',
'groupings.id',
'groupings.instructor_approved',
'assignment_properties.group_min',
'courses.name',
'groups.repo_name',
'users.user_name'
)

# Step 4: Process results in Ruby (now O(n) iteration, not O(n) DB queries)
# Group by assignment first to preserve due_date DESC ordering (last-deadline approach)
permissions = Hash.new { |h, k| h[k] = [] }
processed_repos = Set.new

# Group by assignment_id first (preserves due_date ordering), then by grouping_id
by_assignment = raw_data.group_by { |row| row[0] } # group by assignment_id

by_assignment.each do |assignment_id, assignment_rows|
by_grouping = assignment_rows.group_by { |row| row[1] } # group by grouping_id

by_grouping.each do |grouping_id, rows|
first_row = rows.first
instructor_approved = first_row[2]
group_min = first_row[3]
course_name = first_row[4]
repo_name = first_row[5]
repo_path = File.join(course_name, repo_name)

# Last-deadline approach: skip if repo already processed by earlier (later due_date) assignment
next if processed_repos.include?(repo_path)

# Check if grouping is valid (same logic as Grouping#is_valid?)
non_rejected_count = membership_counts[grouping_id] || 0
is_valid = instructor_approved || non_rejected_count >= group_min
next unless is_valid

# Check visibility based on inviter's section
inviter_section_id = inviter_sections[grouping_id]
next unless visibility[assignment_id][inviter_section_id]

processed_repos << repo_path
permissions[repo_path] = rows.map { |row| row[6] }.uniq
end
end

permissions
end

# '*' which is reserved to indicate all repos when setting permissions
# TODO: add to this if needed
def self.reserved_locations
Expand Down
139 changes: 139 additions & 0 deletions lib/tasks/benchmark.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Benchmark test data for performance testing.
#
# Usage:
# rake markus:benchmark:create
# NUM_ASSIGNMENTS=10 GROUPINGS=100 rake markus:benchmark:create
# rake markus:benchmark:cleanup
#
# Tests repository permissions (get_student_permissions_bulk) and
# TA statistics (cache_ta_results).

require 'factory_bot_rails'

namespace :markus do
namespace :benchmark do
DATA_FILE = Rails.root.join('tmp/benchmark_data.json')

desc 'Create benchmark test data'
task create: :environment do
num_assignments = ENV.fetch('NUM_ASSIGNMENTS', '5').to_i
num_groupings = ENV.fetch('GROUPINGS', '50').to_i
complete_ratio = ENV.fetch('COMPLETE_RATIO', '0.5').to_f
timestamp = Time.now.to_i

course = Course.first
abort 'No course found. Run db:seed first.' unless course

puts "Creating #{num_assignments} assignments x #{num_groupings} groupings..."

existing_ta = course.tas.first
ta = existing_ta || FactoryBot.create(:ta, course: course)
ta_created = existing_ta.nil?
assignment_ids = []

ActiveRecord::Base.transaction do
num_assignments.times do |i|
assignment = FactoryBot.create(
:assignment,
course: course,
short_identifier: "BENCH#{i}_#{timestamp}",
due_date: 1.week.ago,
assignment_properties_attributes: { vcs_submit: true }
)
assignment_ids << assignment.id

FactoryBot.create(:rubric_criterion, assignment: assignment)

num_groupings.times do |j|
group = FactoryBot.create(
:group,
course: course,
group_name: "bench_#{i}_#{j}_#{timestamp}"
)
grouping = FactoryBot.create(
:grouping_with_inviter,
assignment: assignment,
group: group
)
FactoryBot.create(:ta_membership, grouping: grouping, role: ta)

submission = FactoryBot.create(:version_used_submission, grouping: grouping)
grouping.update_column(:is_collected, true)

result = submission.current_result
result.create_marks

if rand < complete_ratio
result.marks.each { |m| m.update_column(:mark, rand(0..m.criterion.max_mark.to_i)) }
result.update_column(:marking_state, Result::MARKING_STATES[:complete])
end
end
print '.'
end
end

puts "\nDone."
FileUtils.mkdir_p(File.dirname(DATA_FILE))
File.write(DATA_FILE, { assignment_ids: assignment_ids, ta_id: ta.id, ta_created: ta_created }.to_json)
end

desc 'Remove benchmark test data'
task cleanup: :environment do
unless File.exist?(DATA_FILE)
puts 'No benchmark data found.'
next
end

data = JSON.parse(File.read(DATA_FILE), symbolize_names: true)
assignment_ids = data[:assignment_ids]

puts "Removing #{assignment_ids.size} assignments..."

ActiveRecord::Base.transaction do
# Get IDs before deleting
group_ids = Grouping.where(assessment_id: assignment_ids).pluck(:group_id)
result_ids = Result.joins(submission: :grouping)
.where(groupings: { assessment_id: assignment_ids }).ids

# Get student role and user IDs before deleting memberships
student_role_ids = StudentMembership.joins(:grouping)
.where(groupings: { assessment_id: assignment_ids })
.pluck(:role_id)
student_user_ids = Role.where(id: student_role_ids).pluck(:user_id)

# Delete in FK order
Mark.where(result_id: result_ids).delete_all
Result.where(id: result_ids).delete_all
Submission.joins(:grouping).where(groupings: { assessment_id: assignment_ids }).delete_all
Membership.joins(:grouping).where(groupings: { assessment_id: assignment_ids }).delete_all
Grouping.where(assessment_id: assignment_ids).delete_all

Criterion.where(assessment_id: assignment_ids).find_each { |c| c.levels.delete_all }
Criterion.where(assessment_id: assignment_ids).delete_all
AssignmentProperties.where(assessment_id: assignment_ids).delete_all
SubmissionRule.where(assessment_id: assignment_ids).delete_all
Assignment.where(id: assignment_ids).delete_all
Group.where(id: group_ids).delete_all

# Delete student roles and their users
GradeEntryStudent.where(role_id: student_role_ids).delete_all
Role.where(id: student_role_ids).delete_all
User.where(id: student_user_ids).delete_all

# Delete TA if it was created by benchmark
if data[:ta_created]
ta = Role.find_by(id: data[:ta_id])
if ta
ta_user_id = ta.user_id
GraderPermission.where(role_id: ta.id).delete_all
ta.delete
User.where(id: ta_user_id).delete
end
end
end

File.delete(DATA_FILE)
puts 'Done.'
end
end
end
Loading