-
Notifications
You must be signed in to change notification settings - Fork 474
perf: Denormalise feature names on Code Reference scans and add GIN index #6844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
matthewelwell
wants to merge
6
commits into
main
Choose a base branch
from
perf/further-feature-list-improvements
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+371
−15
Open
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
e76e86f
Add denormalised `feature_names` attribute for enhanced DB query perf…
matthewelwell 3c98f2f
Typing fixes
matthewelwell 752b4fc
Minor tidy up to simplify diff
matthewelwell 5bea66f
Add migration test
matthewelwell 5eae45b
Typing improvement
matthewelwell 2b03644
Another one...
matthewelwell File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| from typing import Any | ||
|
|
||
| from django.db.backends.base.base import BaseDatabaseWrapper | ||
| from django.db.models import BooleanField, Func | ||
| from django.db.models.sql.compiler import SQLCompiler | ||
|
|
||
|
|
||
| class ArrayContains(Func): | ||
| """Generates: array_col @> ARRAY[value]::text[] | ||
|
|
||
| Used to check whether a text array column contains a single expression | ||
| value, in a form that PostgreSQL can satisfy with a GIN index. The | ||
| standard ArrayField __contains lookup only accepts concrete Python values, | ||
| not ORM expressions such as F() or OuterRef(), hence this helper. | ||
| """ | ||
|
|
||
| output_field = BooleanField() | ||
|
|
||
| def as_sql( | ||
| self, | ||
| compiler: SQLCompiler, | ||
| connection: BaseDatabaseWrapper, | ||
| *_: Any, | ||
| **extra_context: Any, | ||
| ) -> tuple[str, list[str | int] | tuple[str | int, ...] | tuple[()]]: | ||
| array_expr, value_expr = self.source_expressions | ||
| array_sql, array_params = compiler.compile(array_expr) | ||
| value_sql, value_params = compiler.compile(value_expr) | ||
| return f"{array_sql} @> ARRAY[{value_sql}]::text[]", [ | ||
| *array_params, | ||
| *value_params, | ||
| ] |
35 changes: 35 additions & 0 deletions
35
api/projects/code_references/migrations/0003_add_feature_names.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| from django.apps.registry import Apps | ||
| from django.contrib.postgres.fields import ArrayField | ||
| from django.db import migrations, models | ||
| from django.db.backends.base.schema import BaseDatabaseSchemaEditor | ||
|
|
||
|
|
||
| def backfill_feature_names(apps: Apps, schema_editor: BaseDatabaseSchemaEditor) -> None: | ||
| FeatureFlagCodeReferencesScan = apps.get_model( | ||
| "code_references", "FeatureFlagCodeReferencesScan" | ||
| ) | ||
| scans = list(FeatureFlagCodeReferencesScan.objects.all()) | ||
| for scan in scans: | ||
| scan.feature_names = sorted( | ||
| {ref["feature_name"] for ref in scan.code_references} | ||
| ) | ||
| FeatureFlagCodeReferencesScan.objects.bulk_update(scans, ["feature_names"]) | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
|
|
||
| dependencies = [ | ||
| ("code_references", "0002_add_project_repo_created_index"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.AddField( | ||
| model_name="featureflagcodereferencesscan", | ||
| field=ArrayField(models.TextField(), default=list), | ||
| name="feature_names", | ||
| ), | ||
| migrations.RunPython( | ||
| backfill_feature_names, | ||
| reverse_code=migrations.RunPython.noop, | ||
| ), | ||
| ] | ||
34 changes: 34 additions & 0 deletions
34
api/projects/code_references/migrations/0004_add_feature_names_gin_index.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| from django.contrib.postgres.indexes import GinIndex | ||
| from django.db import migrations | ||
|
|
||
| from core.migration_helpers import PostgresOnlyRunSQL | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
|
|
||
| atomic = False | ||
|
|
||
| dependencies = [ | ||
| ("code_references", "0003_add_feature_names"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.SeparateDatabaseAndState( | ||
| state_operations=[ | ||
| migrations.AddIndex( | ||
| model_name="featureflagcodereferencesscan", | ||
| index=GinIndex( | ||
| fields=["feature_names"], | ||
| name="code_refs_feat_names_gin_idx", | ||
| ), | ||
| ), | ||
| ], | ||
| database_operations=[ | ||
| PostgresOnlyRunSQL( | ||
| 'CREATE INDEX CONCURRENTLY IF NOT EXISTS "code_refs_feat_names_gin_idx" ' | ||
| 'ON "code_references_featureflagcodereferencesscan" USING gin ("feature_names");', | ||
| reverse_sql='DROP INDEX CONCURRENTLY IF EXISTS "code_refs_feat_names_gin_idx"', | ||
| ), | ||
| ], | ||
| ), | ||
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
86 changes: 86 additions & 0 deletions
86
api/tests/unit/projects/code_references/test_unit_projects_code_references_db_helpers.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| from typing import Iterator | ||
|
|
||
| import pytest | ||
| from django.contrib.postgres.fields import ArrayField | ||
| from django.db import connection, models | ||
| from django.db.models import F, Value | ||
| from django.test.utils import isolate_apps | ||
|
|
||
| from projects.code_references.db_helpers import ArrayContains | ||
|
|
||
|
|
||
| @pytest.fixture() | ||
| def names_model(db: None) -> Iterator[type[models.Model]]: | ||
| with isolate_apps("projects.code_references"): | ||
|
|
||
| class NamesModel(models.Model): | ||
| names = ArrayField(models.TextField(), default=list) | ||
|
|
||
| class Meta: | ||
| app_label = "projects.code_references" | ||
|
|
||
| with connection.schema_editor() as editor: | ||
| editor.create_model(NamesModel) | ||
|
|
||
| yield NamesModel | ||
|
|
||
|
|
||
| def test_ArrayContains__matches_when_value_present_in_array( | ||
| names_model: models.Model, | ||
| ) -> None: | ||
| # Given | ||
| names_model.objects.create(names=["john", "esme"]) # type: ignore[attr-defined] | ||
|
|
||
| # When | ||
| result = names_model.objects.annotate( # type: ignore[attr-defined] | ||
| has_name=ArrayContains(F("names"), Value("esme")), | ||
| ).get() | ||
|
|
||
| # Then | ||
| assert result.has_name is True | ||
|
|
||
|
|
||
| def test_ArrayContains__does_not_match_when_value_absent_from_array( | ||
| names_model: models.Model, | ||
| ) -> None: | ||
| # Given | ||
| names_model.objects.create(names=["john"]) # type: ignore[attr-defined] | ||
|
|
||
| # When | ||
| result = names_model.objects.annotate( # type: ignore[attr-defined] | ||
| has_name=ArrayContains(F("names"), Value("lisa")), | ||
| ).get() | ||
|
|
||
| # Then | ||
| assert result.has_name is False | ||
|
|
||
|
|
||
| def test_ArrayContains__filters_queryset_correctly( | ||
| names_model: models.Model, | ||
| ) -> None: | ||
| # Given | ||
| matching = names_model.objects.create(names=["john", "esme"]) # type: ignore[attr-defined] | ||
| names_model.objects.create(names=["lisa"]) # type: ignore[attr-defined] | ||
|
|
||
| # When | ||
| results = names_model.objects.annotate( # type: ignore[attr-defined] | ||
| has_name=ArrayContains(F("names"), Value("john")), | ||
| ).filter(has_name=True) | ||
|
|
||
| # Then | ||
| assert list(results) == [matching] | ||
|
|
||
|
|
||
| def test_ArrayContains__does_not_match_empty_array( | ||
| names_model: models.Model, | ||
| ) -> None: | ||
| # Given | ||
| names_model.objects.create(names=[]) # type: ignore[attr-defined] | ||
|
|
||
| # When | ||
| result = names_model.objects.annotate( # type: ignore[attr-defined] | ||
| has_name=ArrayContains(F("names"), Value("kiefer")), | ||
| ).get() | ||
|
|
||
| # Then | ||
| assert result.has_name is False |
54 changes: 54 additions & 0 deletions
54
api/tests/unit/projects/code_references/test_unit_projects_code_references_migrations.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| from django_test_migrations.migrator import Migrator | ||
|
|
||
|
|
||
| def test_backfill_feature_names(migrator: Migrator) -> None: | ||
| # Given | ||
| old_state = migrator.apply_initial_migration( | ||
| ("code_references", "0002_add_project_repo_created_index") | ||
| ) | ||
|
|
||
| Organisation = old_state.apps.get_model("organisations", "Organisation") | ||
| Project = old_state.apps.get_model("projects", "Project") | ||
| FeatureFlagCodeReferencesScan = old_state.apps.get_model( | ||
| "code_references", "FeatureFlagCodeReferencesScan" | ||
| ) | ||
|
|
||
| organisation = Organisation.objects.create(name="Test Organisation") | ||
| project = Project.objects.create(name="Test Project", organisation=organisation) | ||
|
|
||
| # A scan with references to multiple features (unsorted) including a duplicate | ||
| scan_with_references = FeatureFlagCodeReferencesScan.objects.create( | ||
| project=project, | ||
| repository_url="https://github.com/example/repo", | ||
| revision="abc123", | ||
| code_references=[ | ||
| {"feature_name": "zebra_flag", "file_path": "foo.py", "line_number": 1}, | ||
| {"feature_name": "alpha_flag", "file_path": "bar.py", "line_number": 2}, | ||
| {"feature_name": "zebra_flag", "file_path": "baz.py", "line_number": 3}, | ||
| ], | ||
| ) | ||
|
|
||
| # A scan with no code references | ||
| scan_with_no_references = FeatureFlagCodeReferencesScan.objects.create( | ||
| project=project, | ||
| repository_url="https://github.com/example/repo", | ||
| revision="def456", | ||
| code_references=[], | ||
| ) | ||
|
|
||
| # When | ||
| new_state = migrator.apply_tested_migration( | ||
| ("code_references", "0003_add_feature_names") | ||
| ) | ||
| NewScan = new_state.apps.get_model( | ||
| "code_references", "FeatureFlagCodeReferencesScan" | ||
| ) | ||
|
|
||
| # Then | ||
| # Duplicates are removed and names are sorted | ||
| updated_scan = NewScan.objects.get(id=scan_with_references.id) | ||
| assert updated_scan.feature_names == ["alpha_flag", "zebra_flag"] | ||
|
|
||
| # Empty code_references results in empty feature_names | ||
| updated_scan_no_refs = NewScan.objects.get(id=scan_with_no_references.id) | ||
| assert updated_scan_no_refs.feature_names == [] |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fairly sure there are other fields in this app that don't support oracle, but we probably need to think on how to exclude this from EE.