Review all TODOs and FIXMEs #526

vmarkovtsev · 2019-01-10T12:00:32Z

Convert to issues as needed.

EgorBu · 2019-01-11T10:05:53Z

List of TODOs:

--
./lookout/style/format/analyzer.py-                "\n\t".join("%-55s %.5E" % (fe.feature_names[i], importances[i])
./lookout/style/format/analyzer.py-                            for i in numpy.argsort(-importances)[:25] if importances[i] > 1e-5))
./lookout/style/format/analyzer.py-            submit_event("%s.train.%s.rules" % (cls.name, language), len(trainable_rules.rules))
./lookout/style/format/analyzer.py:            # TODO(vmarkovtsev): save the achieved precision, recall, etc. to the model
./lookout/style/format/analyzer.py-            # throw away imprecise classes
./lookout/style/format/analyzer.py-            if trainable_rules.rules.rules:
./lookout/style/format/analyzer.py-                model[language] = trainable_rules.rules
--
./lookout/style/format/classes.py-"""Predicted class definitions."""
./lookout/style/format/classes.py:# TODO(zurk): refactor CLS related code into one class. Related issue:
./lookout/style/format/classes.py-# https://github.com/src-d/style-analyzer/issues/286
./lookout/style/format/classes.py-
./lookout/style/format/classes.py-CLS_SPACE = "<space>"
--
./lookout/style/format/benchmarks/evaluate_smoke.py-        else:
./lookout/style/format/benchmarks/evaluate_smoke.py-            detected_wrong_fix += 1
./lookout/style/format/benchmarks/evaluate_smoke.py-
./lookout/style/format/benchmarks/evaluate_smoke.py:    # TODO (zurk): Add proper class for benchmark metrics
./lookout/style/format/benchmarks/evaluate_smoke.py-    # https://github.com/src-d/style-analyzer/issues/333
./lookout/style/format/benchmarks/evaluate_smoke.py-    return misdetection, undetected, detected_wrong_fix, detected_correct_fix
./lookout/style/format/benchmarks/evaluate_smoke.py-
--
./lookout/style/format/benchmarks/evaluate_smoke.py-    :return: A dictionary with losses and predicted code for "global" and "local" indentation \
./lookout/style/format/benchmarks/evaluate_smoke.py-             strategies.
./lookout/style/format/benchmarks/evaluate_smoke.py-    """
./lookout/style/format/benchmarks/evaluate_smoke.py:    # TODO (zurk): make optional arguments non-optional.
./lookout/style/format/benchmarks/evaluate_smoke.py-    if (vnodes is None) ^ (fe is None):
./lookout/style/format/benchmarks/evaluate_smoke.py-        raise ValueError("vnodes and fe should be both None or not None.")
./lookout/style/format/benchmarks/evaluate_smoke.py-    losses = {}
--
./lookout/style/format/benchmarks/generate_smoke.py-        writer = csv.DictWriter(index_file, fieldnames=["repo", "style", "from", "to"])
./lookout/style/format/benchmarks/generate_smoke.py-        writer.writeheader()
./lookout/style/format/benchmarks/generate_smoke.py-        for style_name, (pattern, repl) in js_format_rules.items():
./lookout/style/format/benchmarks/generate_smoke.py:            # TODO(zurk): handle custom modification functions as well as regexp.
./lookout/style/format/benchmarks/generate_smoke.py-            pattern = re.compile(pattern)
./lookout/style/format/benchmarks/generate_smoke.py-            for repopath in repopaths:
./lookout/style/format/benchmarks/generate_smoke.py-                repo = Repo(str(repopath))
--
./lookout/style/format/benchmarks/general_report.py-        to_show = to_show[:max_files]
./lookout/style/format/benchmarks/general_report.py-
./lookout/style/format/benchmarks/general_report.py-    template = _load_jinja2_template("quality_report.md.jinja2")
./lookout/style/format/benchmarks/general_report.py:    # TODO(vmarkovtsev): move all the logic inside the template
./lookout/style/format/benchmarks/general_report.py-    res = template.render(language=language, ptr=ptr, conf_mat=mat, target_names=target_names,
./lookout/style/format/benchmarks/general_report.py-                          files=to_show, cl_report=c_sorted, cl_report_full=c_report_full,
./lookout/style/format/benchmarks/general_report.py-                          ppcr=ppcr)
--
./lookout/style/format/benchmarks/top_repos_quality.py-
./lookout/style/format/benchmarks/top_repos_quality.py-from lookout.style.format.benchmarks.general_report import QualityReportAnalyzer
./lookout/style/format/benchmarks/top_repos_quality.py-
./lookout/style/format/benchmarks/top_repos_quality.py:# TODO(zurk): Move REPOSITORIES to ./benchmarks/data/default_repository_list.csv
./lookout/style/format/benchmarks/top_repos_quality.py-# format: "repository to-commit from-commit"
./lookout/style/format/benchmarks/top_repos_quality.py-REPOSITORIES = [
./lookout/style/format/benchmarks/top_repos_quality.py-    "https://github.com/30-seconds/30-seconds-of-code 3a122c9cfcbdc091227879a06a32bc67ccd0d35d c8c60895e80b8bc90583502accdaa339b794609c",  # noqa: E501
--
./lookout/style/format/benchmarks/top_repos_quality.py-    "https://github.com/vuejs/vue-cli 2024f4e52097bed96b527d2e519dccba334f434b 85e4f9839ba88d1283b10bb3112582792b8dac6e",  # noqa: E501
./lookout/style/format/benchmarks/top_repos_quality.py-    "https://github.com/meteor/meteor c3309b123a7220ac24cbe73661184ee946bca01f 62fa9927ce34cff064cc3991439553e7c52b5258",  # noqa: E501
./lookout/style/format/benchmarks/top_repos_quality.py-    "https://github.com/webpack/webpack babe736cfa1ef7e8014ed32ba4a4ec38049dce14 3e74cb428af04eedac60ae13d2420d2b5bd3bde1",  # noqa: E501
./lookout/style/format/benchmarks/top_repos_quality.py:    # TODO: add after bblfsh python client v3 is released and we use it
./lookout/style/format/benchmarks/top_repos_quality.py-    # "https://github.com/vuejs/vue b7105ae8c9093e36ec89a470caa3b78bda3ef467 db1d0474997e9e60b8b0d39a3b7c2af55cfd0d4a",  # noqa: E501
./lookout/style/format/benchmarks/top_repos_quality.py-    # "https://github.com/vuejs/vuex 2e62705d4bce4ebcb8eca23df8c7b849125fc565 1ac16a95c574f6b1386016fb6d4f00cfd2ee1d60",  # noqa: E501
./lookout/style/format/benchmarks/top_repos_quality.py-    "https://github.com/segmentio/evergreen ba22d511dad83c072842e47801ef42697d142f7c 1030eca5da38dce4e5047c23a3ea7fc0c246b8ce",  # noqa: E501
--
./lookout/style/format/benchmarks/top_repos_quality.py-    if os.path.exists(repository):
./lookout/style/format/benchmarks/top_repos_quality.py-        return repository
./lookout/style/format/benchmarks/top_repos_quality.py-    # clone repository
./lookout/style/format/benchmarks/top_repos_quality.py:    # TODO: use dulwich in the future
./lookout/style/format/benchmarks/top_repos_quality.py-    git_dir = os.path.join(storage_dir, get_repo_name(repository))  # location for code
./lookout/style/format/benchmarks/top_repos_quality.py-    cmd = "git clone --single-branch --branch master %s %s" % (repository, git_dir)
./lookout/style/format/benchmarks/top_repos_quality.py-    subprocess.check_call(cmd.split())
--
./lookout/style/format/benchmarks/top_repos_quality.py-def calc_weighted_avg(arr: Sequence[Sequence], col: int, weight_col: int = 5) -> float:
./lookout/style/format/benchmarks/top_repos_quality.py-    """Calculate average value in `col` weighted by column `weight_col`."""
./lookout/style/format/benchmarks/top_repos_quality.py-    numerator, denominator = 0, 0
./lookout/style/format/benchmarks/top_repos_quality.py:    # TODO: switch to numpy arrays
./lookout/style/format/benchmarks/top_repos_quality.py-    for row in arr:
./lookout/style/format/benchmarks/top_repos_quality.py-        numerator += row[col] * row[weight_col]
./lookout/style/format/benchmarks/top_repos_quality.py-        denominator += row[weight_col]
--
./lookout/style/format/benchmarks/top_repos_quality.py-def calc_avg(arr: Sequence[Sequence], col: int) -> float:
./lookout/style/format/benchmarks/top_repos_quality.py-    """Calculate average value in `col`."""
./lookout/style/format/benchmarks/top_repos_quality.py-    numerator, denominator = 0, 0
./lookout/style/format/benchmarks/top_repos_quality.py:    # TODO: switch to numpy arrays
./lookout/style/format/benchmarks/top_repos_quality.py-    for row in arr:
./lookout/style/format/benchmarks/top_repos_quality.py-        numerator += row[col]
./lookout/style/format/benchmarks/top_repos_quality.py-        denominator += 1
--
./lookout/style/format/benchmarks/top_repos_quality.py-def _get_model_summary(report: str) -> (int, float):
./lookout/style/format/benchmarks/top_repos_quality.py-    """Extract model summary - number of rules and avg. len."""
./lookout/style/format/benchmarks/top_repos_quality.py-    data = _get_json_data(report)
./lookout/style/format/benchmarks/top_repos_quality.py:    # TODO(vmarkovtsev): address this embarrasing hardcode
./lookout/style/format/benchmarks/top_repos_quality.py-    return data["javascript"]["num_rules"], data["javascript"]["avg_rule_len"]
./lookout/style/format/benchmarks/top_repos_quality.py-
./lookout/style/format/benchmarks/top_repos_quality.py-

and FIXME

./lookout/style/format/benchmarks/general_report.py-        vnodes = chain.from_iterable(fix.file_vnodes for fix in fixes)
./lookout/style/format/benchmarks/general_report.py-        ys = numpy.hstack(fix.y for fix in fixes)
./lookout/style/format/benchmarks/general_report.py-        y_pred_pure = numpy.hstack(fix.y_pred_pure for fix in fixes)
./lookout/style/format/benchmarks/general_report.py:        # FIXME(vmarkovtsev): we are taking the first fix here which does not work for >1 language
./lookout/style/format/benchmarks/general_report.py-        return generate_quality_report(
./lookout/style/format/benchmarks/general_report.py-            fixes[0].language, self.model.ptr, vnodes, ys, y_pred_pure, self.config["max_files"],
./lookout/style/format/benchmarks/general_report.py-            fixes[0].feature_extractor.composite_class_representations)

cmd to collect it
grep -r --exclude=\*.{xml,js,coffee,md,map,ts,in,html,yml,npmignore,ipynb,go,proto} "FIXME" -A3 -B3 .
grep -r --exclude=\*.{xml,js,coffee,md,map,ts,in,html,yml,npmignore,ipynb,go,proto} "TODO" -A3 -B3 .

EgorBu · 2019-01-16T14:21:37Z

# ./lookout/style/format/analyzer.py:
                "\n\t".join("%-55s %.5E" % (fe.feature_names[i], importances[i])
                            for i in numpy.argsort(-importances)[:25] if importances[i] > 1e-5))
            submit_event("%s.train.%s.rules" % (cls.name, language), len(trainable_rules.rules))
            # TODO(vmarkovtsev): save the achieved precision, recall, etc. to the model
            # throw away imprecise classes
            if trainable_rules.rules.rules:
                model[language] = trainable_rules.rules

should be covered by #534

EgorBu · 2019-01-16T14:23:13Z

# ./lookout/style/format/classes.py-
"""Predicted class definitions."""
# TODO(zurk): refactor CLS related code into one class. Related issue:
# https://github.com/src-d/style-analyzer/issues/286

CLS_SPACE = "<space>"

related issue: #286

EgorBu · 2019-01-16T14:24:52Z

# ./lookout/style/format/benchmarks/evaluate_smoke.py- 
       else:
            detected_wrong_fix += 1

    # TODO (zurk): Add proper class for benchmark metrics
    # https://github.com/src-d/style-analyzer/issues/333
    return misdetection, undetected, detected_wrong_fix, detected_correct_fix

related issue: Rename benchmark metric names

EgorBu · 2019-01-16T14:30:34Z

#./lookout/style/format/benchmarks/evaluate_smoke.py-
    :return: A dictionary with losses and predicted code for "global" and "local" indentation \
             strategies.
    """
    # TODO (zurk): make optional arguments non-optional.
    if (vnodes is None) ^ (fe is None):
        raise ValueError("vnodes and fe should be both None or not None.")
    losses = {}

@zurk , could you explain what should be done here?

EgorBu · 2019-01-16T14:35:31Z

# ./lookout/style/format/benchmarks/generate_smoke.py-
        writer = csv.DictWriter(index_file, fieldnames=["repo", "style", "from", "to"])
        writer.writeheader()
        for style_name, (pattern, repl) in js_format_rules.items():
            # TODO(zurk): handle custom modification functions as well as regexp.
            pattern = re.compile(pattern)
            for repopath in repopaths:
                repo = Repo(str(repopath))

could be done like this

if isinstance(pattern, str):
    pattern = re.compile(pattern)
    pattern = lambda repl, code: re.sub(pattern, repl, code)
....
new_code = pattern(repl, code)
filepath.write_text(new_code)

related #535

EgorBu · 2019-01-16T14:37:07Z

#./lookout/style/format/benchmarks/general_report.py-
        to_show = to_show[:max_files]

    template = _load_jinja2_template("quality_report.md.jinja2")
    # TODO(vmarkovtsev): move all the logic inside the template
    res = template.render(language=language, ptr=ptr, conf_mat=mat, target_names=target_names,
                          files=to_show, cl_report=c_sorted, cl_report_full=c_report_full,
                          ppcr=ppcr)

it's possible to create dictionary and send it to render - is it your intention, @vmarkovtsev ?

EgorBu · 2019-01-16T14:42:11Z

#./lookout/style/format/benchmarks/top_repos_quality.py-
from lookout.style.format.benchmarks.general_report import QualityReportAnalyzer

# TODO(zurk): Move REPOSITORIES to ./benchmarks/data/default_repository_list.csv
# format: "repository to-commit from-commit"
REPOSITORIES = [
    "https://github.com/30-seconds/30-seconds-of-code 3a122c9cfcbdc091227879a06a32bc67ccd0d35d c8c60895e80b8bc90583502accdaa339b794609c",  # noqa: E501

related PR: Add input file for quality report

EgorBu · 2019-01-16T14:45:42Z

#./lookout/style/format/benchmarks/top_repos_quality.py-
    if os.path.exists(repository):
        return repository
    # clone repository
    # TODO: use dulwich in the future
    git_dir = os.path.join(storage_dir, get_repo_name(repository))  # location for code
    cmd = "git clone --single-branch --branch master %s %s" % (repository, git_dir)
    subprocess.check_call(cmd.split())

example how to do it and how it should look in the end

from dulwich import porcelain
...

porcelain.clone(repository, git_dir)

Related PR: #535

EgorBu · 2019-01-16T14:57:24Z

#./lookout/style/format/benchmarks/top_repos_quality.py-
def calc_weighted_avg(arr: Sequence[Sequence], col: int, weight_col: int = 5) -> float:
    """Calculate average value in `col` weighted by column `weight_col`."""
    numerator, denominator = 0, 0
    # TODO: switch to numpy arrays
    for row in arr:
        numerator += row[col] * row[weight_col]
        denominator += row[weight_col]

solution

arr = numpy.array(arr)
return (arr[:, col] * arr[:, weight_col]).sum() / arr[:, weight_col].sum()

Related PR: #535

EgorBu · 2019-01-16T14:59:13Z

#./lookout/style/format/benchmarks/top_repos_quality.py-
def calc_avg(arr: Sequence[Sequence], col: int) -> float:
    """Calculate average value in `col`."""
    numerator, denominator = 0, 0
    # TODO: switch to numpy arrays
    for row in arr:
        numerator += row[col]
        denominator += 1

solution

arr = numpy.array(arr)
return arr[:, col].sum() / arr.shape[0]

Related PR: #535

EgorBu · 2019-01-16T15:01:11Z

#./lookout/style/format/benchmarks/top_repos_quality.py-
def _get_model_summary(report: str) -> (int, float):
    """Extract model summary - number of rules and avg. len."""
    data = _get_json_data(report)
    # TODO(vmarkovtsev): address this embarrasing hardcode
    return data["javascript"]["num_rules"], data["javascript"]["avg_rule_len"]

Solution: iterate through available languages and return these parameters for each language

EgorBu · 2019-01-16T15:03:23Z

#./lookout/style/format/benchmarks/general_report.py-
        vnodes = chain.from_iterable(fix.file_vnodes for fix in fixes)
        ys = numpy.hstack(fix.y for fix in fixes)
        y_pred_pure = numpy.hstack(fix.y_pred_pure for fix in fixes)
        # FIXME(vmarkovtsev): we are taking the first fix here which does not work for >1 language
        return generate_quality_report(
            fixes[0].language, self.model.ptr, vnodes, ys, y_pred_pure, self.config["max_files"],
            fixes[0].feature_extractor.composite_class_representations)

Solution: generate quality report per language - requires refactoring of reports

src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

zurk · 2019-05-22T10:40:19Z

@vmarkovtsev let's close this one. This issue related to old refactoring phase. Some of the TODOs were solved, but we have a bunch of new TODOs already, but it is another story.

vmarkovtsev added the refactor label Jan 10, 2019

vmarkovtsev added this to the Refactoring January 2019 milestone Jan 10, 2019

vmarkovtsev assigned EgorBu Jan 10, 2019

vmarkovtsev added the small Small size label Jan 10, 2019

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 16, 2019

Complete TODO: handle custom modification functions as well as regexp

065e2c3

src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 16, 2019

Complete two TODOs:

2d4b559

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

EgorBu mentioned this issue Jan 16, 2019

Complete several TODOs #535

Merged

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 16, 2019

Complete TODO: handle custom modification functions as well as regexp

bc33690

src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 16, 2019

Complete two TODOs:

79b9825

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 17, 2019

Complete TODO: handle custom modification functions as well as regexp

7fa90c7

src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 17, 2019

Complete two TODOs:

5ac5bbe

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

EgorBu pushed a commit to EgorBu/style-analyzer that referenced this issue Jan 18, 2019

Complete two TODOs:

532111c

* use dulwich in the future src-d#526 (comment) * switch to numpy arrays src-d#526 (comment) src-d#526 (comment) Signed-off-by: egor <egor@sourced.tech>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Review all TODOs and FIXMEs #526

Review all TODOs and FIXMEs #526

vmarkovtsev commented Jan 10, 2019

EgorBu commented Jan 11, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019

EgorBu commented Jan 16, 2019

zurk commented May 22, 2019

Review all TODOs and FIXMEs #526

Review all TODOs and FIXMEs #526

Comments

vmarkovtsev commented Jan 10, 2019

EgorBu commented Jan 11, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019 • edited Loading

EgorBu commented Jan 16, 2019

EgorBu commented Jan 16, 2019

zurk commented May 22, 2019

EgorBu commented Jan 11, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading

EgorBu commented Jan 16, 2019 •

edited

Loading