Skip to content

Commit

Permalink
Merge pull request #1 from deepsafe/init
Browse files Browse the repository at this point in the history
init package
  • Loading branch information
fcakyon authored Jan 4, 2023
2 parents 2d85602 + fd89a50 commit 7e7d70f
Show file tree
Hide file tree
Showing 12 changed files with 685 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/publish_pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Publish Python Package

on:
release:
types: [published, edited]

jobs:
publish:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: |
python setup.py sdist bdist_wheel
twine upload --verbose --skip-existing dist/*
14 changes: 14 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Include the README
include *.md

# Include the license file
include LICENSE

# Include setup.py
include setup.py

# Include requirements.txt
include requirements.txt

# Include the data files
recursive-include safetext/languages *
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,24 @@
# safetext
Rule-based profanity checking tool for English and Turkish.

### installation

```bash
pip install safetext
```

### usage

```python
from safetext import SafeText

st = SafeText(language='en')

results = st.check_profanity(text='Some text with <profanity-word>.')
>> results
>> {'word': '<profanity-word>', 'index': 4, 'start': 15, 'end': 31}

text = st.censor_profanity(text='Some text with <profanity-word>.')
>> text
>> "Some text with ***."
```
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
zemberek-python>=0.2.3, <0.3.0
30 changes: 30 additions & 0 deletions safetext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from .languages.en import EnglishProfanityChecker
from .languages.tr import TurkishProfanityChecker

__version__ = "0.0.1"


class SafeText:
def __init__(self, language="en"):
self.language = language
self.checker = None
self.set_language(language)

def set_language(self, language):
self.language = language
if language == "en":
self.checker = EnglishProfanityChecker()
elif language == "tr":
self.checker = TurkishProfanityChecker()
else:
raise ValueError("Language not supported")

def check_profanity(self, text):
if self.checker is None:
raise ValueError("Language not set")
return self.checker.check(text)

def censor_profanity(self, text):
if self.checker is None:
raise ValueError("Language not set")
return self.checker.censor(text)
Empty file added safetext/languages/__init__.py
Empty file.
69 changes: 69 additions & 0 deletions safetext/languages/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
class BaseProfanityChecker:
"""Base class for profanity checkers."""

def __init__(self, language):
self.language = language

@property
def words_filepath(self):
"""Get the filepath for the profanity words file."""
import pathlib

return f"{pathlib.Path(__file__).parent.resolve()}/{self.language}/words.txt"

@property
def profanity_words(self):
"""Get the profanity words for the language."""
if not hasattr(self, "_profanity_words"):
self._profanity_words = self._read_words(self.words_filepath)

return self._profanity_words

def _check(self, text):
"""Check the text for profanity."""
# Split the text into a list of words
words = text.split()

# Initialize a list to store the indices of profanity words
profanity_infos = []

for i, word in enumerate(words):
if word.lower() in self.profanity_words:
start_index = sum(
len(w) + 1 for w in words[:i]
) # +1 to account for space between words
end_index = start_index + len(word)
profanity_info = {
"word": word,
"index": i + 1,
"start": start_index,
"end": end_index,
}
profanity_infos.append(profanity_info)

return profanity_infos

def _read_words(self, filepath):
"""Read the profanity words from the given file."""
with open(filepath, "r") as f:
profanity_words = f.read().splitlines()

return profanity_words

def _preprocess(self, text):
"""Preprocess the text before checking for profanity."""
return text

def check(self, text):
"""Check the text for profanity."""
return self._check(self._preprocess(text))

def censor(self, text):
"""Censor the text."""
detected_profanities = self.check(text)
for profanity in detected_profanities:
start_index = profanity["start"]
end_index = profanity["end"]
text = text.replace(text[start_index:end_index], "***")

return text
8 changes: 8 additions & 0 deletions safetext/languages/en/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from safetext.languages.base import BaseProfanityChecker


class EnglishProfanityChecker(BaseProfanityChecker):
"""Turkish profanity checker."""

def __init__(self):
super().__init__(language="en")
74 changes: 74 additions & 0 deletions safetext/languages/en/words.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
anal
anus
arse
ass
ass fuck
ass hole
assfucker
asshole
assshole
bastard
bitch
black cock
bloody hell
boong
cock
cockfucker
cocksuck
cocksucker
coon
coonnass
crap
cunt
cyberfuck
damn
darn
dick
dirty
douche
dummy
erect
erection
erotic
escort
fag
faggot
fuck
Fuck off
fuck you
fuckass
fuckhole
god damn
gook
hard core
hardcore
homoerotic
hore
lesbian
lesbians
mother fucker
motherfuck
motherfucker
negro
nigger
orgasim
orgasm
penis
penisfucker
piss
piss off
porn
porno
pornography
pussy
retard
sadist
sex
sexy
shit
slut
son of a bitch
suck
tits
viagra
whore
8 changes: 8 additions & 0 deletions safetext/languages/tr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from safetext.languages.base import BaseProfanityChecker


class TurkishProfanityChecker(BaseProfanityChecker):
"""Turkish profanity checker."""

def __init__(self):
super().__init__(language="tr")
Loading

0 comments on commit 7e7d70f

Please sign in to comment.