Skip to content

Commit ff45540

Browse files
authored
Merge pull request #11 from SamhammerAG/KIT-3065
KIT 3065 Update & Maintenance
2 parents 23cad5b + d3dbef2 commit ff45540

18 files changed

Lines changed: 116 additions & 120 deletions

.devcontainer/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# https://github.com/devcontainers/images/tree/main/src/python
2-
FROM mcr.microsoft.com/devcontainers/python:3.8-bullseye
2+
FROM mcr.microsoft.com/devcontainers/python:3.8-bookworm
33

44
# Uninstall pre-installed formatting and linting tools
55
# They would conflict with our pinned versions

.devcontainer/devcontainer.json

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,32 @@
1010
"python.pythonPath": "/usr/local/bin/python",
1111
"python.defaultInterpreterPath": "/usr/local/bin/python",
1212
"python.languageServer": "Pylance",
13-
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
14-
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
15-
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
16-
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
17-
"python.linting.flake8Path": "/home/vscode/.local/bin/flake8",
18-
"flake8.importStrategy": "fromEnvironment",
19-
"python.linting.mypyPath": "/home/vscode/.local/bin/mypy",
20-
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
21-
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
22-
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
13+
"flake8.path": [
14+
"/usr/local/py-utils/bin/bandit",
15+
"/usr/local/py-utils/bin/pydocstyle"
16+
],
17+
"flake8.importStrategy": "fromEnvironment"
2318
},
2419
"extensions": [
25-
"ms-python.python",
26-
"ms-python.vscode-pylance",
27-
"ms-python.flake8",
20+
"AykutSarac.jsoncrack-vscode",
21+
"eamodio.gitlens",
22+
"Gruntfuggly.todo-tree",
2823
"matangover.mypy",
24+
"ms-python.flake8",
2925
"ms-python.isort",
30-
"usernamehw.errorlens",
31-
"sourcery.sourcery",
32-
"njqdev.vscode-python-typehint",
26+
"ms-python.mypy-type-checker",
27+
"ms-python.pylint",
28+
"ms-python.python",
29+
"ms-python.vscode-pylance",
3330
"njpwerner.autodocstring",
31+
"njqdev.vscode-python-typehint",
3432
"redhat.vscode-yaml",
35-
"visualstudioexptteam.vscodeintellicode",
36-
"kaih2o.python-resource-monitor",
37-
"geeebe.duplicate",
38-
"oderwat.indent-rainbow",
39-
"shardulm94.trailing-spaces",
40-
"streetsidesoftware.code-spell-checker",
4133
"ryanluker.vscode-coverage-gutters",
4234
"spmeesseman.vscode-taskexplorer",
43-
"eamodio.gitlens",
35+
"streetsidesoftware.code-spell-checker",
4436
"tamasfe.even-better-toml",
45-
"AykutSarac.jsoncrack-vscode",
46-
"yzhang.markdown-all-in-one",
47-
"Gruntfuggly.todo-tree"
37+
"visualstudioexptteam.vscodeintellicode",
38+
"yzhang.markdown-all-in-one"
4839
]
4940
}
5041
},

.pylintrc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[FORMAT]
2+
max-line-length=120

.vscode/settings.json

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,47 @@
33
"editor.rulers": [
44
120
55
],
6+
"editor.codeActionsOnSave": {
7+
"source.organizeImports": true
8+
},
9+
"files.trimTrailingWhitespace": true,
610
"files.exclude": {
11+
".coverage": true,
12+
".flake8": true,
13+
".gitattributes": true,
14+
".gitignore": true,
715
".idea": true,
816
".mypy_cache": true,
9-
"**/.pytest_cache": true,
17+
".mypy.ini": true,
18+
".pylintrc": true,
1019
"*.egg-info": true,
1120
"**/__pycache__": true,
12-
".coverage": true,
21+
"**/.pytest_cache": true,
1322
"coverage.xml": true,
1423
"htmlcov": true,
15-
"venv": true
24+
"venv": true,
1625
},
1726
"python.testing.pytestArgs": [
1827
"tests"
1928
],
2029
"python.testing.pytestEnabled": true,
2130
"python.testing.unittestEnabled": false,
22-
"python.linting.enabled": true,
23-
"python.linting.flake8Enabled": true,
24-
"python.formatting.provider": "black",
25-
"python.formatting.blackArgs": [ "--line-length=120" ],
26-
"[python]": { "editor.formatOnSave": true },
27-
"[json]": { "editor.formatOnSave": true},
28-
"editor.codeActionsOnSave": { "source.organizeImports": true },
29-
"python.linting.mypyEnabled": true,
30-
"mypy.targets": [
31-
// Required by dmypy - otherwise mypy has an error in vscode
32-
"--follow-imports=normal",
33-
"."
34-
],
35-
"python.analysis.inlayHints.functionReturnTypes": true,
36-
"python.analysis.inlayHints.pytestParameters": true,
31+
"[python]": {
32+
"editor.defaultFormatter": "ms-python.black-formatter",
33+
"editor.formatOnSave": true
34+
},
3735
"python.analysis.diagnosticSeverityOverrides": {
3836
"reportPrivateUsage": "information",
3937
},
40-
"errorLens.enabledDiagnosticLevels": [
41-
"error",
42-
"warning"
38+
"black-formatter.args": [
39+
"--line-length=120"
4340
],
44-
"todo-tree.general.showActivityBarBadge": true,
45-
"todo-tree.general.statusBar": "current file"
41+
"[json]": {
42+
"editor.formatOnSave": true
43+
},
44+
"mypy.targets": [
45+
// Required by dmypy - otherwise mypy has an error in vscode
46+
"--follow-imports=normal",
47+
"."
48+
]
4649
}

ai_data_preprocessing_queue/Pipeline.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ def __init__(self, step_dict: Dict[str, Optional[str]]) -> None:
1010
processor = StepProcessor(step_name, step_dict.get(step_name))
1111
self.step_processors.append(processor)
1212

13-
def consume(self, item: Any, globalState: Optional[Dict[str, Any]] = None) -> Any:
14-
retVal = item
13+
def consume(self, item: Any, global_state: Optional[Dict[str, Any]] = None) -> Any:
14+
ret_val = item
1515

16-
itemState: Dict[str, Any] = {}
16+
item_state: Dict[str, Any] = {}
1717
for processor in self.step_processors:
18-
retVal = processor.run(retVal, itemState, globalState)
18+
ret_val = processor.run(ret_val, item_state, global_state)
1919

20-
return retVal
20+
return ret_val

ai_data_preprocessing_queue/StepProcessor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66

77

88
class StepProcessor:
9-
def __init__(self, name: str, stepData: Optional[str]) -> None:
9+
def __init__(self, name: str, step_data: Optional[str]) -> None:
1010
self.name: str = name
11-
self.stepData: Optional[str] = stepData
11+
self.step_data: Optional[str] = step_data
1212

1313
package_name = f"{__package__}.Steps"
1414
module_name = f".{self.name}"
1515
self.module = importlib.import_module(module_name, package_name)
1616

1717
assert self.module.step is not None
1818

19-
def run(self, item: Any, itemState: Dict[str, Any], globalState: Optional[Dict[str, Any]] = None) -> Any:
20-
return self.module.step(item, itemState, globalState, self.stepData or "")
19+
def run(self, item: Any, item_state: Dict[str, Any], global_state: Optional[Dict[str, Any]] = None) -> Any:
20+
return self.module.step(item, item_state, global_state, self.step_data or "")
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
1-
from typing import Any, Dict, Optional
2-
3-
from langdetect import detect
4-
51
"""
62
Detects one of the following languages and writes the language to local state
73
af, ar, bg, bn, ca, cs, cy, da, de, el, en, es, et, fa, fi, fr, gu, he,
84
hi, hr, hu, id, it, ja, kn, ko, lt, lv, mk, ml, mr, ne, nl, no, pa, pl,
95
pt, ro, ru, sk, sl, so, sq, sv, sw, ta, te, th, tl, tr, uk, ur, vi,
106
zh-cn, zh-tw
117
"""
8+
from typing import Any, Dict, Optional
9+
10+
from langdetect import detect
1211

1312

14-
def step(item: Any, itemState: Dict[str, Any], globalState: Optional[Dict[str, Any]], preprocessorData: str) -> Any:
15-
itemState["language"] = detect(item[:100])
13+
def step(item: Any, item_state: Dict[str, Any], global_state: Optional[Dict[str, Any]], preprocessor_data: str) -> Any:
14+
item_state["language"] = detect(item[:100])
1615
return item

ai_data_preprocessing_queue/Steps/regex_replacement.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import pandas
66

77

8-
def step(item: Any, itemState: Dict[str, Any], globalState: Optional[Dict[str, Any]], preprocessorData: str) -> Any:
9-
if preprocessorData is None or not preprocessorData:
8+
def step(item: Any, item_state: Dict[str, Any], global_state: Optional[Dict[str, Any]], preprocessor_data: str) -> Any:
9+
if preprocessor_data is None or not preprocessor_data:
1010
return item
1111

12-
csv = _get_data_from_store_or_reload(globalState, preprocessorData)
12+
csv = _get_data_from_store_or_reload(global_state, preprocessor_data)
1313

1414
for _, row in csv.iterrows():
1515
pattern = re.compile(row[0])
@@ -18,21 +18,21 @@ def step(item: Any, itemState: Dict[str, Any], globalState: Optional[Dict[str, A
1818
return item
1919

2020

21-
def _get_data_from_store_or_reload(globalState: Optional[Dict[str, Any]], preprocessorData: str) -> pandas.DataFrame:
22-
if globalState is None:
23-
return _prepare_pre_processor_data(preprocessorData)
21+
def _get_data_from_store_or_reload(global_state: Optional[Dict[str, Any]], preprocessor_data: str) -> pandas.DataFrame:
22+
if global_state is None:
23+
return _prepare_pre_processor_data(preprocessor_data)
2424

25-
dictIdentifier = "regexReplacementPreprocessorData"
26-
if dictIdentifier in globalState:
27-
return globalState[dictIdentifier]
25+
dict_identifier = "regexReplacementpreprocessor_data"
26+
if dict_identifier in global_state:
27+
return global_state[dict_identifier]
2828

29-
preparedData = _prepare_pre_processor_data(preprocessorData)
30-
globalState[dictIdentifier] = preparedData
31-
return preparedData
29+
prepared_data = _prepare_pre_processor_data(preprocessor_data)
30+
global_state[dict_identifier] = prepared_data
31+
return prepared_data
3232

3333

34-
def _prepare_pre_processor_data(preprocessorData: str) -> pandas.DataFrame:
35-
csv = pandas.read_csv(StringIO(preprocessorData), header=None, usecols=[0, 1, 2], quotechar='"', encoding="utf8")
34+
def _prepare_pre_processor_data(preprocessor_data: str) -> pandas.DataFrame:
35+
csv = pandas.read_csv(StringIO(preprocessor_data), header=None, usecols=[0, 1, 2], quotechar='"', encoding="utf8")
3636

3737
csv[0] = csv[0].str.strip()
3838
csv[1] = csv[1].str.strip()

ai_data_preprocessing_queue/Steps/remove_numbers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
from typing import Any, Dict, Optional
33

44

5-
def step(item: Any, itemState: Dict[str, Any], globalState: Optional[Dict[str, Any]], preprocessorData: str) -> Any:
5+
def step(item: Any, item_state: Dict[str, Any], global_state: Optional[Dict[str, Any]], preprocessor_data: str) -> Any:
66
item = re.sub(r"""\d""", " ", item)
77
return item

ai_data_preprocessing_queue/Steps/remove_punctuation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
from typing import Any, Dict, Optional
33

44

5-
def step(item: Any, itemState: Dict[str, Any], globalState: Optional[Dict[str, Any]], preprocessorData: str) -> Any:
5+
def step(item: Any, item_state: Dict[str, Any], global_state: Optional[Dict[str, Any]], preprocessor_data: str) -> Any:
66
item = re.sub(r"[^\w\s]", " ", item)
77
return item

0 commit comments

Comments
 (0)