Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Java/JDK 21 fix for regex (incomplete) #9854

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.util.*;
import java.util.regex.Pattern;

import static java.util.regex.Pattern.UNICODE_CHARACTER_CLASS;

public class PatternRuleHandler extends XMLRuleHandler {

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.languagetool.tokenizers.ca;

import org.junit.Ignore;
import org.junit.Test;
import org.languagetool.TestTools;
import org.languagetool.language.Catalan;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public UnitConversionRule(ResourceBundle messages) {

addUnit("Meilen?", MILE, "Meile", 1, false);
addUnit("Yard", YARD, "Yard", 1, false);
addUnit("Fuß", FEET, "Fuß", 1, false);
addUnit("(?U)Fuß", FEET, "Fuß", 1, false);
addUnit("Zoll", INCH, "Zoll", 1, false);

addUnit("(Kilometer pro Stunde|Stundenkilometer)", KILOMETRE_PER_HOUR, "Kilometer pro Stunde", 1, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15085,7 +15085,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example correction="Gros">Für das <marker>Groß</marker> der Studierenden sind die Bedingungen schlecht.</example>
</rule>
<rule>
<regexp case_sensitive='yes'>\ben gro(ß|ss)\b</regexp>
<regexp case_sensitive='yes'>(?U)\ben gro(ß|ss)\b</regexp>
<message>Meinten Sie <suggestion>en gros</suggestion>?</message>
<url>https://www.duden.de/rechtschreibung/en_gros</url>
<short>&verw;</short>
Expand Down Expand Up @@ -75616,7 +75616,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<rulegroup id="ANS_OHNE_APOSTROPH" name="Kein Apostroph bei 'ans', 'ins', 'aufs' etc.">
<url>https://languagetool.org/insights/de/beitrag/grammatik-deppenapostroph/</url>
<rule>
<regexp>\b(an|in|für|auf|durch|hinter|über|um|unter|vor)['’´`‘]s</regexp>
<regexp>(?U)\b(an|in|für|auf|durch|hinter|über|um|unter|vor)['’´`‘]s</regexp>
<message>Das Wort <suggestion>\1s</suggestion> (\1 das) wird ohne Apostroph geschrieben.</message>
<short>'Ans', 'ins' usw. werden ohne Apostroph geschrieben.</short>
<example>Ich ging <marker>ans</marker> Fenster.</example>
Expand All @@ -75627,7 +75627,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example correction="hinters">Er wurde <marker>hinter's</marker> Licht geführt!</example>
</rule>
<rule>
<regexp>\b(hinter|über|unter)['’´`‘](n|m)</regexp>
<regexp>(?U)\b(hinter|über|unter)['’´`‘](n|m)</regexp>
<message>Das umgangssprachliche Wort <suggestion>\1\2</suggestion> wird ohne Apostroph geschrieben.</message>
<short>'übern', 'untern' usw. werden ohne Apostroph geschrieben.</short>
<example correction="übern">Ich bin <marker>über'n</marker> Berg.</example>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5354,7 +5354,7 @@ USA
<example>Das sind die <marker>Klausuren</marker>.</example>
</rule>
<rule>
<regexp case_sensitive='yes'>\b(üblichen?) (Gepflogenheit(en)?)\b</regexp>
<regexp case_sensitive='yes'>(?U)\b(üblichen?) (Gepflogenheit(en)?)\b</regexp>
<message>Mögliche Redundanz: Die Bedeutung des Nomens '\2' beinhaltet bereits das durch das Adjektiv '\1' ausgedrückte Merkmal.</message>
<suggestion>\2</suggestion>
<short>&redundanz;</short>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
import org.languagetool.rules.AbstractMakeContractionsFilter;

public class MakeContractionsFilter extends AbstractMakeContractionsFilter {

private static final Pattern DE_LE = Pattern.compile("\\bde le\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
private static final Pattern A_LE = Pattern.compile("\\bà le\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
private static final Pattern DE_LES = Pattern.compile("\\bde les\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
private static final Pattern A_LES = Pattern.compile("\\bà les\\b", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

private static final int FLAGS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS;
private static final Pattern DE_LE = Pattern.compile("\\bde le\\b", FLAGS);
private static final Pattern A_LE = Pattern.compile("\\bà le\\b", FLAGS);
private static final Pattern DE_LES = Pattern.compile("\\bde les\\b", FLAGS);
private static final Pattern A_LES = Pattern.compile("\\bà les\\b", FLAGS);

protected String fixContractions(String suggestion) {
Matcher matcher = DE_LE.matcher(suggestion);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45429,7 +45429,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<rulegroup id="BANC_BAN" name="banc et ban">
<url>http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?id=4466</url>
<rule>
<regexp mark="1">\bau (banc) (?:des nations|de la (?:société|ville|communauté|France)|de l['´‘’′](?:Europe|empire|église|islam))\b</regexp>
<regexp mark="1">(?U)\bau (banc) (?:des nations|de la (?:société|ville|communauté|France)|de l['´‘’′](?:Europe|empire|église|islam))\b</regexp>
<message>Confusion probable entre « banc » et <suggestion>ban</suggestion>.</message>
<example correction="ban">Il a été mis au <marker>banc</marker> de la société.</example>
<example correction="ban">Il a été mis au <marker>banc</marker> de l’empire.</example>
Expand Down Expand Up @@ -120744,7 +120744,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example>Il habite en Franche-Comté.</example>
</rule>
<rule>
<regexp>\b(?-i)(?![ÎI]le-de-France\b)(?i)[IÎ]ll?es?[-‑‐  ]de[-‑‐  ]France\b</regexp>
<regexp>(?U)\b(?-i)(?![ÎI]le-de-France\b)(?i)[IÎ]ll?es?[-‑‐  ]de[-‑‐  ]France\b</regexp>
<message>Cette région s'écrit avec un trait d'union.</message>
<suggestion>Île-de-France</suggestion>
<suggestion>Ile-de-France</suggestion>
Expand Down Expand Up @@ -120777,14 +120777,14 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example correction="Vitry-sur-Seine">Il habite à <marker>Vitry sur Seine</marker>.</example>
</rule>
<rule>
<regexp>\b(?-i)(?!Asnières-sur-Seine\b)(?i)As?nn?i[èe]rr?es?[-‑‐  ]sur[-‑‐  ]Sei?nes?\b</regexp>
<regexp>(?U)\b(?-i)(?!Asnières-sur-Seine\b)(?i)As?nn?i[èe]rr?es?[-‑‐  ]sur[-‑‐  ]Sei?nes?\b</regexp>
<message>Cette ville s'écrit avec des traits d'union.</message>
<suggestion>Asnières-sur-Seine</suggestion>
<url>https://fr.wikipedia.org/wiki/Asnières-sur-Seine</url>
<example correction="Asnières-sur-Seine">Il habite à <marker>Asnières sur Seine</marker>.</example>
</rule>
<rule>
<regexp>\b(?-i)(?!Épinay-sur-Seine\b)(?i)[ÉE]pp?inn?[ae][yi]s?[-‑‐  ]sur[-‑‐  ]Sei?nes?\b</regexp>
<regexp>(?U)\b(?-i)(?!Épinay-sur-Seine\b)(?i)[ÉE]pp?inn?[ae][yi]s?[-‑‐  ]sur[-‑‐  ]Sei?nes?\b</regexp>
<message>Cette ville s'écrit avec des traits d'union.</message>
<suggestion>Épinay-sur-Seine</suggestion>
<url>https://fr.wikipedia.org/wiki/Épinay-sur-Seine</url>
Expand Down Expand Up @@ -121720,7 +121720,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<rulegroup id="XXieme" name="XXième">
<url>http://www.academie-francaise.fr/abreviations-des-adjectifs-numeraux</url>
<rule>
<regexp case_sensitive="yes" mark="1">\b(([IVXLC]+|\d+)[-‑‐]?(i?[eèé]mes?|è))\b</regexp>
<regexp case_sensitive="yes" mark="1">(?U)\b(([IVXLC]+|\d+)[-‑‐]?(i?[eèé]mes?|è))\b</regexp>
<message>Faute de typographie.</message>
<suggestion><match no="1" regexp_match="(.*?)[-‑‐]?(i?[eèéê]mes?|è)" regexp_replace="$1ᵉ"/></suggestion>
<example correction="XIXᵉ">Un écrivain du <marker>XIX-ième</marker> siècle</example>
Expand All @@ -121730,7 +121730,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example>La 2ᵉ à droite</example>
</rule>
<rule>
<regexp case_sensitive="yes" mark="1">\b(([IVXLC]+|\d+)[-‑‐]?(i?[eèé]mes?|è))\b</regexp>
<regexp case_sensitive="yes" mark="1">(?U)\b(([IVXLC]+|\d+)[-‑‐]?(i?[eèé]mes?|è))\b</regexp>
<message>Faute de typographie.</message>
<suggestion><match no="1" regexp_match="(.*?)[-‑‐]?(i?[eèéê]mes|è)" regexp_replace="$1ᵉˢ"/></suggestion>
<example>Les 2ᵉˢ du tournoi</example>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,49 +48,49 @@ public PortugueseUnitConversionRule(ResourceBundle messages) {
addUnit("grama", KILOGRAM, "gramas", 1e-3, true);
addUnit("toneladas?", KILOGRAM, "toneladas", 1e3, true);
addUnit("libras?", POUND, "libras", 1, false);
addUnit("onças?", OUNCE, "onças", 1, false);
addUnit("(?U)onças?", OUNCE, "onças", 1, false);

addUnit("milhas?", MILE, "milhas", 1, false);
addUnit("jardas?", YARD, "jardas", 1, false);
addUnit("pés?", FEET, "pés", 1, false);
addUnit("(?U)pés?", FEET, "pés", 1, false);
addUnit("polegadas?", INCH, "polegadas", 1, false);

addUnit("(qu|k)ilômetros? por hora", KILOMETRE_PER_HOUR, "quilômetros por hora", 1, true);
addUnit("(?U)(qu|k)ilômetros? por hora", KILOMETRE_PER_HOUR, "quilômetros por hora", 1, true);
addUnit("milhas? por hora", MILE.divide(HOUR), "milhas por hora", 1, false);

addUnit("metros?", METRE, "metros", 1, true);
addUnit("(qu|k)ilômetros?", METRE, "quilômetros", 1e3, true);
addUnit("decímetros?", METRE, "decímetros", 1e-1, false); // metric, but should not be suggested
addUnit("centímetros?", METRE, "centímetros", 1e-2, true);
addUnit("milímetros?", METRE, "milímetros", 1e-3, true);
addUnit("micrômetros?", METRE, "micrômetros", 1e-6, true);
addUnit("nanômetros?", METRE, "nanômetros", 1e-9, true);
addUnit("picômetros?", METRE, "picômetros", 1e-12, true);
addUnit("fentômetros?", METRE, "fentômetros", 1e-15, true);
addUnit("(?U)(qu|k)ilômetros?", METRE, "quilômetros", 1e3, true);
addUnit("(?U)decímetros?", METRE, "decímetros", 1e-1, false); // metric, but should not be suggested
addUnit("(?U)centímetros?", METRE, "centímetros", 1e-2, true);
addUnit("(?U)milímetros?", METRE, "milímetros", 1e-3, true);
addUnit("(?U)micrômetros?", METRE, "micrômetros", 1e-6, true);
addUnit("(?U)nanômetros?", METRE, "nanômetros", 1e-9, true);
addUnit("(?U)picômetros?", METRE, "picômetros", 1e-12, true);
addUnit("(?U)fentômetros?", METRE, "fentômetros", 1e-15, true);

addUnit("metros? quadrados?", SQUARE_METRE, "metros quadrados", 1, true);
addUnit("hectar(es)?", SQUARE_METRE, "hectares", 1e4, true);
addUnit("ares?", SQUARE_METRE, "ares", 1e2, true);
addUnit("(k|qui)ilômetros? quadrados?", SQUARE_METRE, "quilômetros quadrados", 1e6, true);
addUnit("decímetros? quadrados?", SQUARE_METRE, "decímetros quadrados", 1e-2, false/*true*/); // Metric, but not commonly used
addUnit("centímetros? quadrados?", SQUARE_METRE, "centímetros quadrados", 1e-4, true);
addUnit("milímetros? quadrados?", SQUARE_METRE, "milímetros quadrados", 1e-6, true);
addUnit("micrômetros? quadrados?", SQUARE_METRE, "micrômetros quadrados", 1e-12, true);
addUnit("nanômetros? quadrados?", SQUARE_METRE, "nanômetros quadrados", 1e-18, true);

addUnit("metros? cúbicos?", CUBIC_METRE,"metros cúbicos", 1, true);
addUnit("(k|qu)ilômetros? cúbicos?", CUBIC_METRE, "quilômetros cúbicos", 1e9, true);
addUnit("decímetros? cúbicos?", CUBIC_METRE, "decímetros cúbicos", 1e-3, false/*true*/); // Metric, but not commonly used
addUnit("centímetros? cúbicos?", CUBIC_METRE,"centímetros cúbicos", 1e-6, true);
addUnit("milímetros? cúbicos?", CUBIC_METRE,"milímetros cúbicos", 1e-9, true);
addUnit("micrômetros? cúbicos?", CUBIC_METRE,"micrômetros cúbicos", 1e-18, true);
addUnit("nanômetros? cúbicos?", CUBIC_METRE, "nanômetros cúbicos", 1e-27, true);
addUnit("(?U)(k|qui)ilômetros? quadrados?", SQUARE_METRE, "quilômetros quadrados", 1e6, true);
addUnit("(?U)decímetros? quadrados?", SQUARE_METRE, "decímetros quadrados", 1e-2, false/*true*/); // Metric, but not commonly used
addUnit("(?U)centímetros? quadrados?", SQUARE_METRE, "centímetros quadrados", 1e-4, true);
addUnit("(?U)milímetros? quadrados?", SQUARE_METRE, "milímetros quadrados", 1e-6, true);
addUnit("(?U)micrômetros? quadrados?", SQUARE_METRE, "micrômetros quadrados", 1e-12, true);
addUnit("(?U)nanômetros? quadrados?", SQUARE_METRE, "nanômetros quadrados", 1e-18, true);

addUnit("(?U)metros? cúbicos?", CUBIC_METRE,"metros cúbicos", 1, true);
addUnit("(?U)(k|qu)ilômetros? cúbicos?", CUBIC_METRE, "quilômetros cúbicos", 1e9, true);
addUnit("(?U)decímetros? cúbicos?", CUBIC_METRE, "decímetros cúbicos", 1e-3, false/*true*/); // Metric, but not commonly used
addUnit("(?U)centímetros? cúbicos?", CUBIC_METRE,"centímetros cúbicos", 1e-6, true);
addUnit("(?U)milímetros? cúbicos?", CUBIC_METRE,"milímetros cúbicos", 1e-9, true);
addUnit("(?U)micrômetros? cúbicos?", CUBIC_METRE,"micrômetros cúbicos", 1e-18, true);
addUnit("(?U)nanômetros? cúbicos?", CUBIC_METRE, "nanômetros cúbicos", 1e-27, true);

addUnit("litros?", LITRE, "litros", 1, true);
addUnit("mililitros?", LITRE, "mililitros", 1e-3, true);

addUnit( "(?:Graus)? Fahrenheit", FAHRENHEIT, "graus Fahrenheit", 1, false);
addUnit( "(?:Graus)? (Celsi[ou]s|[cC]entígrados?)", CELSIUS, "graus Celsius", 1, true);
addUnit("(?:Graus)? Fahrenheit", FAHRENHEIT, "graus Fahrenheit", 1, false);
addUnit("(?U)(?:Graus)? (Celsi[ou]s|[cC]entígrados?)", CELSIUS, "graus Celsius", 1, true);
}

@Override
Expand Down