From 2c6637295087c208080edece25e4fe81bbe1efd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=EC=B0=ACB?= Date: Mon, 1 Jan 2024 17:59:48 +0900 Subject: [PATCH] =?UTF-8?q?sasrec=20=EC=A0=84=EC=B2=98=EB=A6=AC=20?= =?UTF-8?q?=EC=A4=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + SASRec/daum_train_dir/log.txt | 94 - SASRec/test.ipynb | 5163 +-------------------------------- 3 files changed, 3 insertions(+), 5256 deletions(-) diff --git a/.gitignore b/.gitignore index 61a70ea..9722942 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ venv/ */.ipynb_checkpoints .env + +*.pth diff --git a/SASRec/daum_train_dir/log.txt b/SASRec/daum_train_dir/log.txt index 930743e..e69de29 100644 --- a/SASRec/daum_train_dir/log.txt +++ b/SASRec/daum_train_dir/log.txt @@ -1,94 +0,0 @@ -(0.044373985099689615, 0.09300769488123119) (0.0436886654152135, 0.09611520428667113) -(0.045958140814474274, 0.10003368137420007) (0.04205997193413812, 0.09044972208185953) -(0.044356783572856405, 0.0970873786407767) (0.044191534035070534, 0.09572536722231391) -(0.04536698385636212, 0.09559811827956989) (0.041851336699796224, 0.09435227082285906) -(0.04472889149772005, 0.0956288728856138) (0.04328474266361943, 0.09551034134857911) -(0.048239726844116405, 0.09866666666666667) (0.04731443834971107, 0.10084591142809753) -(0.04444361392809496, 0.09430156328794756) (0.04332403218243672, 0.09416818106723938) -(0.04498127493582461, 0.09726848767488341) (0.04525999418504957, 0.10093582887700535) -(0.04632685236601284, 0.0988681757656458) (0.042796153361988165, 0.09431438127090301) -(0.04468345576295705, 0.09794497845541929) (0.04491325008922685, 0.09681188449340677) -(0.046945052656717834, 0.09722222222222222) (0.04322472167728918, 0.09549817941079113) -(0.047108773451795455, 0.1019594818997011) (0.04729820852430967, 0.10666446171655367) -(0.04439505611152801, 0.09386523633925578) (0.04408586029639218, 0.0959680106631123) -(0.044713080324515236, 0.09448425262456257) (0.043471028547191266, 0.09427048634243837) -(0.04542152430980709, 0.09690893901420217) (0.04214566700896336, 0.09306599832915623) -(0.04609101754429701, 0.09804571050016561) (0.03956568400189815, 0.08898516913847693) -(0.04333023959747806, 0.0940959409594096) (0.042723565048111825, 0.0953171703752906) -(0.044794015836273744, 0.09756909756909757) (0.04618120097960042, 0.09858215179316097) -(0.043662183688253316, 0.09410979476055398) (0.043360224251974784, 0.09529372496662217) -(0.04498876556460492, 0.09571619812583668) (0.04272734669658938, 0.09306963603124481) -(0.04202379984043614, 0.09075797872340426) (0.04311992508752632, 0.0947403910991234) -(0.045268512141080894, 0.09768766756032171) (0.0468607209114692, 0.10271802567950641) -(0.04359844583233627, 0.09207459207459208) (0.04612020241453335, 0.09836065573770492) -(0.04608093283742416, 0.09939859672569329) (0.04607346748579972, 0.09966386554621849) -(0.04526191445994982, 0.0961344537815126) (0.04325749151363178, 0.09676879290138958) -(0.04321233420344863, 0.09391622340425532) (0.042122987863489766, 0.09336238685886691) -(0.04515464679552393, 0.09449859573765075) (0.0440012218333394, 0.09720394736842106) -(0.046830484903936265, 0.09821728893373696) (0.04373298824276087, 0.0968651517664621) -(0.04497007068711003, 0.09635761589403974) (0.04416763488411565, 0.096169928081619) -(0.04654062510085091, 0.09937888198757763) (0.0438247729395723, 0.09775694676933377) -(0.04425386183798519, 0.09374478732276897) (0.045890680325843, 0.09981669721713048) -(0.04753692353344556, 0.10040774719673802) (0.04764233375044716, 0.10197258441992645) -(0.044033521210461134, 0.09535780124015418) (0.04722631472039476, 0.10424710424710425) -(0.045047601234450986, 0.09595624792840571) (0.042727357430090054, 0.09519054751206524) -(0.04560667788898872, 0.10005027652086476) (0.044965005745442904, 0.09879235155987924) -(0.04317580382931966, 0.09181471027727046) (0.04356802299458313, 0.09647216184584517) -(0.045552122060880905, 0.09863832613749585) (0.04359571461945084, 0.09581646423751687) -(0.04433883683392936, 0.09640145262462858) (0.04604111340064984, 0.10028413839211098) -(0.042727681195515996, 0.09155285786545271) (0.04187160103789263, 0.09353970390309556) -(0.042262878514634006, 0.09264853977844914) (0.043341495840876695, 0.09636941609503095) -(0.048414427820394935, 0.10634359750967524) (0.04350931952536367, 0.09576612903225806) -(0.04671164198918663, 0.1009695753928452) (0.04202857359608118, 0.09141135107985937) -(0.041498395380641026, 0.09093965372331485) (0.045108944375966256, 0.09820379385596777) -(0.047318505487230225, 0.09931219594027847) (0.04436594803821674, 0.09668785547005687) -(0.04695970548119968, 0.09842061512884455) (0.043601732380282754, 0.09605662285136501) -(0.0449643100808555, 0.0961730449251248) (0.04102761974235967, 0.0897796918999503) -(0.04410275170908447, 0.09142006943296413) (0.044811550756412646, 0.09560067681895093) -(0.04609206346898147, 0.09764253469319512) (0.04118232470974501, 0.0934203917629332) -(0.04421181471998444, 0.09468244707451241) (0.04208845172760542, 0.09323736190157349) -(0.04822891734139953, 0.09991603694374475) (0.045479527331409, 0.0999172870140612) -(0.044924124182479167, 0.09650443217929419) (0.04404108099165882, 0.09601600266711119) -(0.04716903475909976, 0.09900823667843335) (0.04326355498307944, 0.0970729287249876) -(0.04651980500422951, 0.09908409658617819) (0.045423030683618325, 0.10120768838237795) -(0.045979672194898805, 0.09620253164556962) (0.0410301792563652, 0.09124704291990537) -(0.04467948579279559, 0.09312823942484534) (0.04557738629934526, 0.09626653273062113) -(0.04566795643220165, 0.0969961402919953) (0.0443693817307125, 0.09696359671196109) -(0.04670630859570594, 0.09910366987992558) (0.044419329958988354, 0.09523020867837032) -(0.04457919923969163, 0.09336650082918739) (0.04433661357939026, 0.09914443885254152) -(0.04410684240367229, 0.0961250623648761) (0.04353721992146174, 0.0949748743718593) -(0.04446339230131681, 0.09572335449381891) (0.0466181967120369, 0.0995995995995996) -(0.04699288712446491, 0.09991617770326906) (0.04399123131071464, 0.09655172413793103) -(0.044141877681033906, 0.09600935047587243) (0.04360320569806565, 0.09500506243671954) -(0.04853040622486531, 0.10468227424749164) (0.04449082958047818, 0.09770788020746193) -(0.04648920916138218, 0.09846717760746418) (0.04377790334516103, 0.09737454303755401) -(0.04388210836652969, 0.09391711229946524) (0.04268150507603442, 0.0933422549347608) -(0.041551298589270716, 0.08895653644512092) (0.045356184468011054, 0.09665489998319045) -(0.04570014579074976, 0.09669771380186283) (0.047086365883282716, 0.10132974246759804) -(0.045210775454608235, 0.09725727746929161) (0.04590632840884281, 0.0993322203672788) -(0.0446139393887298, 0.09472803347280334) (0.042652613930647655, 0.09273182957393483) -(0.045129543140635865, 0.09833610648918469) (0.0460359568992712, 0.10095589468388395) -(0.04358612933479947, 0.09564489112227806) (0.04261383273870651, 0.09507042253521127) -(0.04128931422858646, 0.09077256799599533) (0.042503272384162856, 0.09485368314833502) -(0.043970261843005794, 0.09475096427972497) (0.042389636396742716, 0.09444351743700985) -(0.045238677585835635, 0.0971150620597115) (0.044671639993329695, 0.09896528704939919) -(0.04468767124491524, 0.09563025210084034) (0.044465484587960555, 0.09803273268308811) -(0.04545421956093857, 0.09889926617745164) (0.04260071358855361, 0.09299213915370463) -(0.04698144298129544, 0.09924812030075188) (0.0445872303203614, 0.09728544933400776) -(0.04698439506478846, 0.10084033613445378) (0.04227566283607216, 0.0933780385582565) -(0.04686287022782827, 0.09790444258172674) (0.04093423738573832, 0.08991962491627595) -(0.04207474937499147, 0.09171105193075899) (0.044001042677243744, 0.09548494983277592) -(0.04349252527387824, 0.09087899354411522) (0.04532052084971128, 0.10125936010891763) -(0.04504383099253152, 0.09564346519779669) (0.04471522343514236, 0.09889926617745164) -(0.04453584742104499, 0.09655402030963875) (0.043379123373391656, 0.09518233522917363) -(0.04734798778587445, 0.09855849815621857) (0.04627600879854919, 0.10107274555816292) -(0.046065615041243395, 0.09935680433310765) (0.046580859195749905, 0.10055583628094998) -(0.04310740337516884, 0.09306599832915623) (0.04411939335473692, 0.0962182061579652) -(0.04502056531710858, 0.09503784693019345) (0.04284339779169671, 0.09645010046885466) -(0.043917445943709575, 0.09433333333333334) (0.04107515513937971, 0.09063444108761329) -(0.04560131349859057, 0.09773441375888871) (0.04194879222157747, 0.09536556801070771) -(0.04851305172714049, 0.10114980836527246) (0.0435360059908385, 0.0948073701842546) -(0.04267579754388378, 0.09437113746450643) (0.04473541662223114, 0.09934662422516334) -(0.04798602951741334, 0.10063417890520694) (0.042088276239164016, 0.09445551128818061) -(0.04453145229737061, 0.09627277285642655) (0.04296047526336265, 0.09382924767540152) -(0.043139343527128796, 0.09169638308711156) (0.042095087278438165, 0.09701742627345844) diff --git a/SASRec/test.ipynb b/SASRec/test.ipynb index 1fcf76a..9eef931 100644 --- a/SASRec/test.ipynb +++ b/SASRec/test.ipynb @@ -1,5162 +1 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting pymysql\n", - " Downloading PyMySQL-1.1.0-py3-none-any.whl (44 kB)\n", - "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m44.8/44.8 kB\u001B[0m \u001B[31m504.4 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", - "\u001B[?25hInstalling collected packages: pymysql\n", - "Successfully installed pymysql-1.1.0\n" - ] - } - ], - "source": [ - "pip install pymysql" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "NKDyPANDcj7D", - "executionInfo": { - "status": "ok", - "timestamp": 1703384850058, - "user_tz": -540, - "elapsed": 13070, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "8ff71f23-074d-4684-afc8-011872a3740d", - "colab": { - "base_uri": "https://localhost:8080/" - } - } - }, - { - "cell_type": "code", - "execution_count": 3, - "outputs": [], - "source": [ - "import pymysql" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "WVE63KBbcj7D", - "executionInfo": { - "status": "ok", - "timestamp": 1703384850059, - "user_tz": -540, - "elapsed": 13, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - } - } - }, - { - "cell_type": "code", - "execution_count": 4, - "outputs": [], - "source": [ - "endpoint = \"pseudorec.cvhv2t0obyv3.ap-northeast-2.rds.amazonaws.com\"\n", - "port = 3306\n", - "user = \"admin\"\n", - "region = \"ap-northeast-2c\"\n", - "dbname = \"movielens25m\"\n", - "passwd = 'Precsys1!'\n", - "\n", - "# connection = pymysql.connect(host=endpoint, user=user, passwd=passwd, port=port,\n", - "# database=dbname)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "xZRkRRYucj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384850059, - "user_tz": -540, - "elapsed": 10, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - } - } - }, - { - "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Mounted at /content/drive\n" - ] - } - ], - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nwzAtR3Ecj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384870906, - "user_tz": -540, - "elapsed": 20856, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "4098d371-17ce-483b-d1b1-5b6353f30d81" - } - }, - { - "cell_type": "code", - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'/content'" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - } - }, - "metadata": {}, - "execution_count": 6 - } - ], - "source": [ - "import os\n", - "os.getcwd()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "id": "jLYz0Wx9cj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384875655, - "user_tz": -540, - "elapsed": 31, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "11ccdb2a-ef57-43b4-c9e0-d51900540881" - } - }, - { - "cell_type": "code", - "execution_count": 7, - "outputs": [], - "source": [ - "import os\n", - "os.chdir('/content/drive/MyDrive/000GithubRepos/recsys_key_papers_implementation/SASRec')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "uF4rBkGgcj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384876035, - "user_tz": -540, - "elapsed": 406, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - } - } - }, - { - "cell_type": "code", - "execution_count": 8, - "outputs": [], - "source": [ - "import pandas as pd" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "wqdPtiQucj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384876410, - "user_tz": -540, - "elapsed": 383, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - } - } - }, - { - "cell_type": "code", - "execution_count": 9, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['/content',\n", - " '/env/python',\n", - " '/usr/lib/python310.zip',\n", - " '/usr/lib/python3.10',\n", - " '/usr/lib/python3.10/lib-dynload',\n", - " '',\n", - " '/usr/local/lib/python3.10/dist-packages',\n", - " '/usr/lib/python3/dist-packages',\n", - " '/usr/local/lib/python3.10/dist-packages/IPython/extensions',\n", - " '/root/.ipython']" - ] - }, - "metadata": {}, - "execution_count": 9 - } - ], - "source": [ - "import sys\n", - "sys.path" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "e7emrLYncj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384876410, - "user_tz": -540, - "elapsed": 18, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "0217a2a5-d687-48dd-8c75-fe34dffaa8fa" - } - }, - { - "cell_type": "code", - "execution_count": 10, - "outputs": [], - "source": [ - "sys.path.append('../')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "QNnGfwwmcj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703384876410, - "user_tz": -540, - "elapsed": 14, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting boto3\n", - " Downloading boto3-1.34.7-py3-none-any.whl (139 kB)\n", - "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m139.3/139.3 kB\u001B[0m \u001B[31m1.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", - "\u001B[?25h" - ] - } - ], - "source": [ - "pip install boto3" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "tqYg8k9_cj7E", - "outputId": "63c90e3d-9500-4898-ca5e-e5d60cc23770", - "colab": { - "base_uri": "https://localhost:8080/" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "# 데이터 불러오기" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "KV0gagCTcj7E" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import boto3\n", - "import pandas as pd\n", - "import pymysql\n", - "from boto3.dynamodb.conditions import Key\n", - "# from dotenv import load_dotenv\n", - "\n", - "# load_dotenv()\n", - "\n", - "class MysqlClient:\n", - " def __init__(self):\n", - " self.endpoint = \"pseudorec.cvhv2t0obyv3.ap-northeast-2.rds.amazonaws.com\"\n", - " self.port = 3306\n", - " self.user = \"admin\"\n", - " self.region = \"ap-northeast-2c\"\n", - " self.dbname = \"movielens25m\"\n", - " self.passwd = 'Precsys1!'\n", - " os.environ['LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN'] = '1'\n", - " # self.connection = pymysql.connect(host=endpoint, user=user, passwd=passwd, port=port, database=dbname)\n", - "\n", - " def get_connection(self):\n", - " connection = pymysql.connect(host=self.endpoint, user=self.user, passwd=self.passwd, port=self.port,\n", - " database=self.dbname)\n", - " return connection\n", - "\n", - " def get_count(self, table_name):\n", - " with self.get_connection().cursor() as cursor:\n", - " cursor.execute(f\"select count(*) from {table_name}\")\n", - " return cursor.fetchall()[0][0]\n", - "\n", - " def get_movies(self):\n", - " with self.get_connection() as connection:\n", - " df = pd.read_sql(sql='select * from movies', con=connection)\n", - " return df\n", - "\n", - " def get_daum_movies(self):\n", - " with self.get_connection() as connection:\n", - " df = pd.read_sql(sql='select * from daum_movies', con=connection)\n", - " return df\n", - "\n", - " def get_daum_ratings(self):\n", - " with self.get_connection() as connection:\n", - " df = pd.read_sql(sql='select * from daum_ratings', con=connection)\n", - " return df\n", - "\n", - " def get_url(self, title):\n", - " with self.get_connection() as connection:\n", - " cursor = connection.cursor()\n", - " cursor.execute(f\"\"\"\n", - " select url from movies where title = '{title}'\n", - " \"\"\")\n", - " url = cursor.fetchall()[0][0]\n", - " return url\n", - "\n", - " def get_table_names(self):\n", - " print(\"Tables : \")\n", - " with self.get_connection().cursor() as cursor:\n", - " sql = \"SHOW TABLES\"\n", - " cursor.execute(sql)\n", - " result = cursor.fetchall()\n", - " for row in result:\n", - " print(row[0])\n", - "\n", - " def get_data_type(self, table_name):\n", - " with self.get_connection().cursor() as cursor:\n", - " cursor.execute(f\"SHOW COLUMNS FROM {table_name}\")\n", - " columns = cursor.fetchall()\n", - " for column in columns:\n", - " column_name = column[0]\n", - " data_type = column[1]\n", - " print(f\"Column: {column_name}, Data Type: {data_type}\")\n", - "\n", - "\n", - "class DynamoDB:\n", - " def __init__(self, table_name: str):\n", - " self.resource = boto3.resource(\n", - " 'dynamodb',\n", - " aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],\n", - " aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],\n", - " region_name=os.environ['AWS_REGION_NAME'],\n", - " )\n", - "\n", - " self.client = boto3.client(\n", - " 'dynamodb',\n", - " aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],\n", - " aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],\n", - " region_name=os.environ['AWS_REGION_NAME'],\n", - " )\n", - " self.table = self.resource.Table(table_name) # clicklog 테이블 등으로 연결\n", - "\n", - " def put_item(self, click_log):\n", - " resp = self.table.put_item(Item=click_log)\n", - "\n", - " def get_a_user_logs(self, user_name: str):\n", - " query = {\"KeyConditionExpression\": Key(\"userId\").eq(user_name)}\n", - " resp = self.table.query(**query)\n", - " return pd.DataFrame(resp['Items'])\n" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "akF5br0Dcj7E" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "mysql = MysqlClient()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "w7_oitsecj7E" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - ":39: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", - " df = pd.read_sql(sql='select * from daum_movies', con=connection)\n" - ] - } - ], - "source": [ - "daum_movies = mysql.get_daum_movies()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "z4smMQdpcj7E", - "executionInfo": { - "status": "ok", - "timestamp": 1703381652208, - "user_tz": -540, - "elapsed": 1164, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "7e163257-52a4-405a-d063-e241cd78c5b0" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " movieId titleKo \\\n", - "0 1 플란다스의 개 \n", - "1 2 카라 \n", - "2 3 주노명 베이커리 \n", - "3 4 여고괴담 두번째 이야기 \n", - "4 5 행복한 장의사 \n", - "... ... ... \n", - "26777 175738 파묘 \n", - "26778 175807 고질라 x 콩: 더 뉴 엠파이어 \n", - "26779 175838 장안궤사전 \n", - "26780 175990 실버 앤드 더 북 오브 드림스 \n", - "26781 176148 쿵푸팬더4 \n", - "\n", - " titleEn \\\n", - "0 Barking Dogs Never Bite 2000 \n", - "1 Calla 1999 \n", - "2 2000 \n", - "3 Memento Mori 1999 \n", - "4 2000 \n", - "... ... \n", - "26777 2022 \n", - "26778 Godzilla x Kong: The New Empire 2024 \n", - "26779 The Man's Secret 長安詭事傳 2023 \n", - "26780 Silver and the Book of Dreams Silber und das B... \n", - "26781 Kung Fu Panda 4 2024 \n", - "\n", - " mainPageUrl \\\n", - "0 https://movie.daum.net/moviedb/grade?movieId=1 \n", - "1 https://movie.daum.net/moviedb/grade?movieId=2 \n", - "2 https://movie.daum.net/moviedb/grade?movieId=3 \n", - "3 https://movie.daum.net/moviedb/grade?movieId=4 \n", - "4 https://movie.daum.net/moviedb/grade?movieId=5 \n", - "... ... \n", - "26777 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "26778 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "26779 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "26780 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "26781 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "\n", - " posterUrl numOfSiteRatings \n", - "0 https://img1.daumcdn.net/thumb/C408x596/?fname... 126.0 \n", - "1 https://img1.daumcdn.net/thumb/C408x596/?fname... 23.0 \n", - "2 None 15.0 \n", - "3 https://img1.daumcdn.net/thumb/C408x596/?fname... 74.0 \n", - "4 https://img1.daumcdn.net/thumb/C408x596/?fname... 53.0 \n", - "... ... ... \n", - "26777 https://img1.daumcdn.net/thumb/C408x596/?fname... 2.0 \n", - "26778 https://img1.daumcdn.net/thumb/C408x596/?fname... 1.0 \n", - "26779 https://img1.daumcdn.net/thumb/C408x596/?fname... 3.0 \n", - "26780 https://img1.daumcdn.net/thumb/C408x596/?fname... 3.0 \n", - "26781 https://img1.daumcdn.net/thumb/C408x596/?fname... 1.0 \n", - "\n", - "[26782 rows x 6 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitleKotitleEnmainPageUrlposterUrlnumOfSiteRatings
01플란다스의 개Barking Dogs Never Bite 2000https://movie.daum.net/moviedb/grade?movieId=1https://img1.daumcdn.net/thumb/C408x596/?fname...126.0
12카라Calla 1999https://movie.daum.net/moviedb/grade?movieId=2https://img1.daumcdn.net/thumb/C408x596/?fname...23.0
23주노명 베이커리2000https://movie.daum.net/moviedb/grade?movieId=3None15.0
34여고괴담 두번째 이야기Memento Mori 1999https://movie.daum.net/moviedb/grade?movieId=4https://img1.daumcdn.net/thumb/C408x596/?fname...74.0
45행복한 장의사2000https://movie.daum.net/moviedb/grade?movieId=5https://img1.daumcdn.net/thumb/C408x596/?fname...53.0
.....................
26777175738파묘2022https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...2.0
26778175807고질라 x 콩: 더 뉴 엠파이어Godzilla x Kong: The New Empire 2024https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...1.0
26779175838장안궤사전The Man's Secret 長安詭事傳 2023https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...3.0
26780175990실버 앤드 더 북 오브 드림스Silver and the Book of Dreams Silber und das B...https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...3.0
26781176148쿵푸팬더4Kung Fu Panda 4 2024https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...1.0
\n", - "

26782 rows × 6 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 53 - } - ], - "source": [ - "daum_movies" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 597 - }, - "id": "osx4U0ZTcj7F", - "executionInfo": { - "status": "ok", - "timestamp": 1703381652212, - "user_tz": -540, - "elapsed": 16, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "f3acbef9-7b91-4581-9d3a-432b3901b7c3" - } - }, - { - "cell_type": "markdown", - "source": [ - "# 테이블 총 개수" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "LLdujZ3icj7G" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "26782" - ] - }, - "metadata": {}, - "execution_count": 54 - } - ], - "source": [ - "mysql.get_count('daum_movies')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dH0HF-9mcj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381656460, - "user_tz": -540, - "elapsed": 498, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "96c765ac-75eb-4dd9-cccc-efb2d38f1756" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "381601" - ] - }, - "metadata": {}, - "execution_count": 55 - } - ], - "source": [ - "mysql.get_count('daum_ratings')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6ESUGsDMcj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381656461, - "user_tz": -540, - "elapsed": 13, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "5a4ec8c6-4709-440e-cdff-fe78511cff56" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - ":44: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", - " df = pd.read_sql(sql='select * from daum_ratings', con=connection)\n" - ] - } - ], - "source": [ - "daum_ratings = mysql.get_daum_ratings()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FiIIbDCacj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381664690, - "user_tz": -540, - "elapsed": 8236, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "4c854bb6-23c2-4b51-8255-63901a6660e1" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " nickName movieId rating timestamp userId \\\n", - "0 매력적그녀 121721 8 1.702394e+09 None \n", - "1 매력적그녀 1660 10 1.701700e+09 None \n", - "2 매력적그녀 164920 10 1.692027e+09 None \n", - "3 매력적그녀 130710 1 1.684073e+09 None \n", - "4 매력적그녀 1953 6 1.677165e+09 None \n", - "... ... ... ... ... ... \n", - "381596 사커홀릭 130908 1 1.636721e+09 None \n", - "381597 사커홀릭 129413 10 1.600674e+09 None \n", - "381598 사커홀릭 122091 10 1.580734e+09 None \n", - "381599 사커홀릭 54520 10 1.560259e+09 None \n", - "381600 휴면 사용자 47747 6 1.469438e+09 None \n", - "\n", - " review \n", - "0 이런 영화 안좋아하는데 어쩔수 없이 봤다가 솔직히 기분은 진짜 별로였음 근데 또 영... \n", - "1 첨엔 내가 주인공이고 이렇게 술술 풀리는 행복한 인생 부럽다 이러다가 갈수록 숨막히... \n", - "2 처음엔 넷플에.떴길래 별 생각없이 보기 시작했는데 너무 잔잔해서 뭐지 하다가 이상하... \n", - "3 이거 왜 영화로 만든건가요? \n", - "4 당시엔 평이 엄청 낮고 여주 혹평이던 영화도 시간이 지나면 이렇게 평점이 좋아지는구... \n", - "... ... \n", - "381596 보다가 졸음이 온 첫 마블영화. 왜구 원폭피해국 홍보영화 \n", - "381597 최고입니다. 누군가들에게는 불편할 듯 ... 보는 내내 찜찜하고 불편했지만 현실세계... \n", - "381598 연기자들의 연기에 찬사를 드립니다~. 다음번엔 궁정동의 여인들도 만들어주세요! \n", - "381599 힐링하고 싶을때 꺼내봅니다. 볼 때마다 놓쳤던 부분을 발견하며 감동이 더하네요. 월... \n", - "381600 진부한 복제와 속임수 \n", - "\n", - "[381601 rows x 6 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nickNamemovieIdratingtimestampuserIdreview
0매력적그녀12172181.702394e+09None이런 영화 안좋아하는데 어쩔수 없이 봤다가 솔직히 기분은 진짜 별로였음 근데 또 영...
1매력적그녀1660101.701700e+09None첨엔 내가 주인공이고 이렇게 술술 풀리는 행복한 인생 부럽다 이러다가 갈수록 숨막히...
2매력적그녀164920101.692027e+09None처음엔 넷플에.떴길래 별 생각없이 보기 시작했는데 너무 잔잔해서 뭐지 하다가 이상하...
3매력적그녀13071011.684073e+09None이거 왜 영화로 만든건가요?
4매력적그녀195361.677165e+09None당시엔 평이 엄청 낮고 여주 혹평이던 영화도 시간이 지나면 이렇게 평점이 좋아지는구...
.....................
381596사커홀릭13090811.636721e+09None보다가 졸음이 온 첫 마블영화. 왜구 원폭피해국 홍보영화
381597사커홀릭129413101.600674e+09None최고입니다. 누군가들에게는 불편할 듯 ... 보는 내내 찜찜하고 불편했지만 현실세계...
381598사커홀릭122091101.580734e+09None연기자들의 연기에 찬사를 드립니다~. 다음번엔 궁정동의 여인들도 만들어주세요!
381599사커홀릭54520101.560259e+09None힐링하고 싶을때 꺼내봅니다. 볼 때마다 놓쳤던 부분을 발견하며 감동이 더하네요. 월...
381600휴면 사용자4774761.469438e+09None진부한 복제와 속임수
\n", - "

381601 rows × 6 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 57 - } - ], - "source": [ - "daum_ratings" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "SFUhJn8lcj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381664691, - "user_tz": -540, - "elapsed": 28, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "07520091-3c9b-48f1-b946-3364d792d059" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "nickName 23894\n", - "movieId 26636\n", - "rating 11\n", - "timestamp 362438\n", - "userId 0\n", - "review 356470\n", - "dtype: int64" - ] - }, - "metadata": {}, - "execution_count": 58 - } - ], - "source": [ - "daum_ratings.nunique()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2XGHWn-bcj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381664692, - "user_tz": -540, - "elapsed": 24, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "b4c9f1ae-2bf5-46fd-99f0-e8b34900559a" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from collections import Counter\n", - "nn_cntr = Counter(daum_ratings['nickName'])" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "ell1p1Mhcj7G" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('휴면 사용자', 10798),\n", - " ('', 2731),\n", - " ('닉네임을 등록해 주세요', 1998),\n", - " ('붉은돼지', 680),\n", - " ('이리', 631),\n", - " ('koh501', 630),\n", - " ('버닝샌더스', 630),\n", - " ('dArKRuSh', 630),\n", - " ('Peter_L', 630),\n", - " ('달빛의그림자', 630),\n", - " ('빨간다라이', 630),\n", - " ('하늘바람별', 630),\n", - " ('이상', 630),\n", - " ('뒤마페르', 629),\n", - " ('Tough Cookie', 629),\n", - " ('잉여인간', 628),\n", - " ('재밌는놀이', 628),\n", - " ('야수죽이기', 627),\n", - " ('김덕뱀다', 564),\n", - " ('르네루소', 562),\n", - " ('L_H_K_', 552),\n", - " ('땅콩샌드', 547),\n", - " ('나는 단호하게 반대한다', 539),\n", - " ('다함께', 527),\n", - " ('김동혁', 523),\n", - " ('억수로', 465),\n", - " ('이지숙', 458),\n", - " ('즐거운인생', 444),\n", - " ('one', 412),\n", - " ('닉네임', 395),\n", - " ('하바별시', 387),\n", - " ('아다나', 385),\n", - " ('탁선생', 361),\n", - " ('지수', 354),\n", - " ('tachyon', 341),\n", - " ('백호', 339),\n", - " ('해명', 337),\n", - " ('대한민국', 336),\n", - " ('오늘', 334),\n", - " ('Freeman', 331),\n", - " ('lemmy', 330),\n", - " ('즐산', 330),\n", - " ('문향서기', 330),\n", - " ('Yungeotgom', 330),\n", - " ('꽉이', 330),\n", - " ('kingofjoy', 330),\n", - " ('류관원', 330),\n", - " ('감찬', 330),\n", - " ('goodmodel', 330),\n", - " ('바크', 330)]" - ] - }, - "metadata": {}, - "execution_count": 60 - } - ], - "source": [ - "sorted(nn_cntr.items(), key=lambda x: x[1], reverse=True)[:50]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "d7AyJW3Ocj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381670023, - "user_tz": -540, - "elapsed": 12, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "2780f7a8-e99a-496b-a1c6-07a6d34d4aa2" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "daum_movies" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "5C5eF3pocj7G" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "available_user = daum_ratings[daum_ratings['nickName'].map(lambda x: x not in ['휴면 사용자', '', '닉네임을 등록해 주세요', '닉네임'])]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "t-SF-nnAcj7G" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " nickName movieId rating timestamp userId \\\n", - "0 매력적그녀 121721 8 1.702394e+09 None \n", - "1 매력적그녀 1660 10 1.701700e+09 None \n", - "2 매력적그녀 164920 10 1.692027e+09 None \n", - "3 매력적그녀 130710 1 1.684073e+09 None \n", - "4 매력적그녀 1953 6 1.677165e+09 None \n", - "... ... ... ... ... ... \n", - "381595 참고로 93503 8 1.498874e+09 None \n", - "381596 사커홀릭 130908 1 1.636721e+09 None \n", - "381597 사커홀릭 129413 10 1.600674e+09 None \n", - "381598 사커홀릭 122091 10 1.580734e+09 None \n", - "381599 사커홀릭 54520 10 1.560259e+09 None \n", - "\n", - " review \n", - "0 이런 영화 안좋아하는데 어쩔수 없이 봤다가 솔직히 기분은 진짜 별로였음 근데 또 영... \n", - "1 첨엔 내가 주인공이고 이렇게 술술 풀리는 행복한 인생 부럽다 이러다가 갈수록 숨막히... \n", - "2 처음엔 넷플에.떴길래 별 생각없이 보기 시작했는데 너무 잔잔해서 뭐지 하다가 이상하... \n", - "3 이거 왜 영화로 만든건가요? \n", - "4 당시엔 평이 엄청 낮고 여주 혹평이던 영화도 시간이 지나면 이렇게 평점이 좋아지는구... \n", - "... ... \n", - "381595 데이빗의 변 \n", - "381596 보다가 졸음이 온 첫 마블영화. 왜구 원폭피해국 홍보영화 \n", - "381597 최고입니다. 누군가들에게는 불편할 듯 ... 보는 내내 찜찜하고 불편했지만 현실세계... \n", - "381598 연기자들의 연기에 찬사를 드립니다~. 다음번엔 궁정동의 여인들도 만들어주세요! \n", - "381599 힐링하고 싶을때 꺼내봅니다. 볼 때마다 놓쳤던 부분을 발견하며 감동이 더하네요. 월... \n", - "\n", - "[365679 rows x 6 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nickNamemovieIdratingtimestampuserIdreview
0매력적그녀12172181.702394e+09None이런 영화 안좋아하는데 어쩔수 없이 봤다가 솔직히 기분은 진짜 별로였음 근데 또 영...
1매력적그녀1660101.701700e+09None첨엔 내가 주인공이고 이렇게 술술 풀리는 행복한 인생 부럽다 이러다가 갈수록 숨막히...
2매력적그녀164920101.692027e+09None처음엔 넷플에.떴길래 별 생각없이 보기 시작했는데 너무 잔잔해서 뭐지 하다가 이상하...
3매력적그녀13071011.684073e+09None이거 왜 영화로 만든건가요?
4매력적그녀195361.677165e+09None당시엔 평이 엄청 낮고 여주 혹평이던 영화도 시간이 지나면 이렇게 평점이 좋아지는구...
.....................
381595참고로9350381.498874e+09None데이빗의 변
381596사커홀릭13090811.636721e+09None보다가 졸음이 온 첫 마블영화. 왜구 원폭피해국 홍보영화
381597사커홀릭129413101.600674e+09None최고입니다. 누군가들에게는 불편할 듯 ... 보는 내내 찜찜하고 불편했지만 현실세계...
381598사커홀릭122091101.580734e+09None연기자들의 연기에 찬사를 드립니다~. 다음번엔 궁정동의 여인들도 만들어주세요!
381599사커홀릭54520101.560259e+09None힐링하고 싶을때 꺼내봅니다. 볼 때마다 놓쳤던 부분을 발견하며 감동이 더하네요. 월...
\n", - "

365679 rows × 6 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 62 - } - ], - "source": [ - "available_user" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "P9qO9yLGcj7G", - "executionInfo": { - "status": "ok", - "timestamp": 1703381715554, - "user_tz": -540, - "elapsed": 499, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "d9349e72-dc64-4c18-b82c-15c279126a60" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "merged = pd.merge(left=available_user, right=daum_movies, how='left', on='movieId')[['nickName', 'movieId', 'titleKo','rating', 'timestamp', 'numOfSiteRatings']]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "wG9VkLkZcj7G" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "merged" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "kJkkNuVZcj7G" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "average_ratings = merged.groupby('movieId')['rating'].mean().reset_index()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "6yP1ggJ1cj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "rating_mean_dict = dict(zip(average_ratings['movieId'], average_ratings['rating']))" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "4vnWCBYPcj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "rating_mean_dict[128434]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "M5Ha5ipZcj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from collections import Counter\n", - "rating_num_dict = Counter(merged['movieId'])" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "gLCS2SSncj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "rating_num_dict" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "qGMFwaHRcj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "daum_movies['rating_mean'] = daum_movies['movieId'].map(rating_mean_dict)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "cZjdF5Z9cj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "daum_movies[daum_movies['movieId'] == 128434]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "mKuDxXm-cj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "daum_movies['num_of_collected_ratings'] = daum_movies['movieId'].map(rating_num_dict)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "BtjbqH-pcj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "daum_movies" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "YGZjo9hKcj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "daum_ratings[daum_ratings['movieId'] == 128434]['rating'].mean()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "fw7EZBkycj7H" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " movieId titleKo \\\n", - "585 1368 쇼생크 탈출 \n", - "18698 111299 더 플랜 \n", - "18732 111495 저수지 게임 \n", - "13383 75497 또 하나의 약속 \n", - "18990 113317 공범자들 \n", - "927 1911 터미네이터2 \n", - "20172 121059 가버나움 \n", - "13468 76325 변호인 \n", - "17450 102536 자백 \n", - "12108 66814 부러진 화살 \n", - "21606 130923 김복동 \n", - "476 1173 레옹 디 오리지널 \n", - "15898 91662 귀향 \n", - "7799 43188 월-E \n", - "19 21 8월의 크리스마스 \n", - "18316 108944 노무현입니다 \n", - "17492 102840 나, 다니엘 블레이크 \n", - "12754 70845 MB의 추억 \n", - "18263 108595 1987 \n", - "2057 3972 클래식 \n", - "19942 119688 그날 바다 \n", - "15463 88598 다이빙벨 \n", - "6821 41156 브이 포 벤데타 \n", - "1963 3752 피아니스트 \n", - "20586 123948 그린 북 \n", - "18292 108740 원더 \n", - "13825 78539 천안함 프로젝트 \n", - "2157 4238 살인의 추억 \n", - "12848 71486 남영동1985 \n", - "17618 103818 무현, 두 도시 이야기 \n", - "\n", - " titleEn \\\n", - "585 The Shawshank Redemption 1994 \n", - "18698 None \n", - "18732 None \n", - "13383 Another Family 2013 \n", - "18990 None \n", - "927 Terminator 2 : Judgment Day Terminator 2 - Le ... \n", - "20172 Capernaum Capharnaüm 2018 \n", - "13468 The Attorney 2013 \n", - "17450 None \n", - "12108 Unbowed 2011 \n", - "21606 None \n", - "476 None \n", - "15898 None \n", - "7799 Wall-E 2008 \n", - "19 Christmas in August 1998 \n", - "18316 None \n", - "17492 None \n", - "12754 Remembrance of MB 2012 \n", - "18263 None \n", - "2057 The Classic 2002 \n", - "19942 Intention 2018 \n", - "15463 None \n", - "6821 V for Vendetta 2005 \n", - "1963 The Pianist Le Pianiste 2002 \n", - "20586 None \n", - "18292 None \n", - "13825 None \n", - "2157 Memories of Murder 2003 \n", - "12848 Namyeong-dong1985 2012 \n", - "17618 None \n", - "\n", - " mainPageUrl \\\n", - "585 https://movie.daum.net/moviedb/grade?movieId=1368 \n", - "18698 https://movie.daum.net/moviedb/main?movieId=11... \n", - "18732 https://movie.daum.net/moviedb/main?movieId=11... \n", - "13383 https://movie.daum.net/moviedb/grade?movieId=7... \n", - "18990 https://movie.daum.net/moviedb/main?movieId=11... \n", - "927 https://movie.daum.net/moviedb/grade?movieId=1911 \n", - "20172 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "13468 https://movie.daum.net/moviedb/grade?movieId=7... \n", - "17450 https://movie.daum.net/moviedb/main?movieId=10... \n", - "12108 https://movie.daum.net/moviedb/grade?movieId=6... \n", - "21606 https://movie.daum.net/moviedb/main?movieId=13... \n", - "476 https://movie.daum.net/moviedb/main?movieId=1173 \n", - "15898 https://movie.daum.net/moviedb/main?movieId=91662 \n", - "7799 https://movie.daum.net/moviedb/grade?movieId=4... \n", - "19 https://movie.daum.net/moviedb/grade?movieId=21 \n", - "18316 https://movie.daum.net/moviedb/main?movieId=10... \n", - "17492 https://movie.daum.net/moviedb/main?movieId=10... \n", - "12754 https://movie.daum.net/moviedb/grade?movieId=7... \n", - "18263 https://movie.daum.net/moviedb/main?movieId=10... \n", - "2057 https://movie.daum.net/moviedb/grade?movieId=3972 \n", - "19942 https://movie.daum.net/moviedb/grade?movieId=1... \n", - "15463 https://movie.daum.net/moviedb/main?movieId=88598 \n", - "6821 https://movie.daum.net/moviedb/grade?movieId=4... \n", - "1963 https://movie.daum.net/moviedb/grade?movieId=3752 \n", - "20586 https://movie.daum.net/moviedb/main?movieId=12... \n", - "18292 https://movie.daum.net/moviedb/main?movieId=10... \n", - "13825 https://movie.daum.net/moviedb/main?movieId=78539 \n", - "2157 https://movie.daum.net/moviedb/grade?movieId=4238 \n", - "12848 https://movie.daum.net/moviedb/grade?movieId=7... \n", - "17618 https://movie.daum.net/moviedb/main?movieId=10... \n", - "\n", - " posterUrl numOfSiteRatings \\\n", - "585 https://img1.daumcdn.net/thumb/C408x596/?fname... 1613.0 \n", - "18698 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "18732 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "13383 https://img1.daumcdn.net/thumb/C408x596/?fname... 4495.0 \n", - "18990 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "927 https://img1.daumcdn.net/thumb/C408x596/?fname... 912.0 \n", - "20172 https://img1.daumcdn.net/thumb/C408x596/?fname... 553.0 \n", - "13468 https://img1.daumcdn.net/thumb/C408x596/?fname... 34631.0 \n", - "17450 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "12108 https://img1.daumcdn.net/thumb/C408x596/?fname... 3071.0 \n", - "21606 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "476 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "15898 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "7799 https://img1.daumcdn.net/thumb/C408x596/?fname... 1125.0 \n", - "19 https://img1.daumcdn.net/thumb/C408x596/?fname... 703.0 \n", - "18316 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "17492 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "12754 https://img1.daumcdn.net/thumb/C408x596/?fname... 1490.0 \n", - "18263 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "2057 https://img1.daumcdn.net/thumb/C408x596/?fname... 4875.0 \n", - "19942 https://img1.daumcdn.net/thumb/C408x596/?fname... 5260.0 \n", - "15463 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "6821 https://img1.daumcdn.net/thumb/C408x596/?fname... 1158.0 \n", - "1963 https://img1.daumcdn.net/thumb/C408x596/?fname... 727.0 \n", - "20586 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "18292 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "13825 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "2157 https://img1.daumcdn.net/thumb/C408x596/?fname... 1757.0 \n", - "12848 https://img1.daumcdn.net/thumb/C408x596/?fname... 1811.0 \n", - "17618 https://img1.daumcdn.net/thumb/C408x596/?fname... NaN \n", - "\n", - " rating_mean num_of_collected_ratings \n", - "585 9.758427 178 \n", - "18698 9.693487 261 \n", - "18732 9.657143 140 \n", - "13383 9.645985 274 \n", - "18990 9.623810 210 \n", - "927 9.593220 118 \n", - "20172 9.582192 146 \n", - "13468 9.580556 1440 \n", - "17450 9.572222 180 \n", - "12108 9.521739 161 \n", - "21606 9.518868 106 \n", - "476 9.508621 116 \n", - "15898 9.503797 395 \n", - "7799 9.500000 104 \n", - "19 9.471074 121 \n", - "18316 9.459524 420 \n", - "17492 9.446429 112 \n", - "12754 9.441176 102 \n", - "18263 9.420997 943 \n", - "2057 9.380435 184 \n", - "19942 9.370000 400 \n", - "15463 9.330218 321 \n", - "6821 9.312977 131 \n", - "1963 9.290909 110 \n", - "20586 9.289683 252 \n", - "18292 9.279279 111 \n", - "13825 9.275109 229 \n", - "2157 9.271357 199 \n", - "12848 9.266667 150 \n", - "17618 9.260204 196 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movieIdtitleKotitleEnmainPageUrlposterUrlnumOfSiteRatingsrating_meannum_of_collected_ratings
5851368쇼생크 탈출The Shawshank Redemption 1994https://movie.daum.net/moviedb/grade?movieId=1368https://img1.daumcdn.net/thumb/C408x596/?fname...1613.09.758427178
18698111299더 플랜Nonehttps://movie.daum.net/moviedb/main?movieId=11...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.693487261
18732111495저수지 게임Nonehttps://movie.daum.net/moviedb/main?movieId=11...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.657143140
1338375497또 하나의 약속Another Family 2013https://movie.daum.net/moviedb/grade?movieId=7...https://img1.daumcdn.net/thumb/C408x596/?fname...4495.09.645985274
18990113317공범자들Nonehttps://movie.daum.net/moviedb/main?movieId=11...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.623810210
9271911터미네이터2Terminator 2 : Judgment Day Terminator 2 - Le ...https://movie.daum.net/moviedb/grade?movieId=1911https://img1.daumcdn.net/thumb/C408x596/?fname...912.09.593220118
20172121059가버나움Capernaum Capharnaüm 2018https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...553.09.582192146
1346876325변호인The Attorney 2013https://movie.daum.net/moviedb/grade?movieId=7...https://img1.daumcdn.net/thumb/C408x596/?fname...34631.09.5805561440
17450102536자백Nonehttps://movie.daum.net/moviedb/main?movieId=10...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.572222180
1210866814부러진 화살Unbowed 2011https://movie.daum.net/moviedb/grade?movieId=6...https://img1.daumcdn.net/thumb/C408x596/?fname...3071.09.521739161
21606130923김복동Nonehttps://movie.daum.net/moviedb/main?movieId=13...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.518868106
4761173레옹 디 오리지널Nonehttps://movie.daum.net/moviedb/main?movieId=1173https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.508621116
1589891662귀향Nonehttps://movie.daum.net/moviedb/main?movieId=91662https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.503797395
779943188월-EWall-E 2008https://movie.daum.net/moviedb/grade?movieId=4...https://img1.daumcdn.net/thumb/C408x596/?fname...1125.09.500000104
19218월의 크리스마스Christmas in August 1998https://movie.daum.net/moviedb/grade?movieId=21https://img1.daumcdn.net/thumb/C408x596/?fname...703.09.471074121
18316108944노무현입니다Nonehttps://movie.daum.net/moviedb/main?movieId=10...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.459524420
17492102840나, 다니엘 블레이크Nonehttps://movie.daum.net/moviedb/main?movieId=10...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.446429112
1275470845MB의 추억Remembrance of MB 2012https://movie.daum.net/moviedb/grade?movieId=7...https://img1.daumcdn.net/thumb/C408x596/?fname...1490.09.441176102
182631085951987Nonehttps://movie.daum.net/moviedb/main?movieId=10...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.420997943
20573972클래식The Classic 2002https://movie.daum.net/moviedb/grade?movieId=3972https://img1.daumcdn.net/thumb/C408x596/?fname...4875.09.380435184
19942119688그날 바다Intention 2018https://movie.daum.net/moviedb/grade?movieId=1...https://img1.daumcdn.net/thumb/C408x596/?fname...5260.09.370000400
1546388598다이빙벨Nonehttps://movie.daum.net/moviedb/main?movieId=88598https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.330218321
682141156브이 포 벤데타V for Vendetta 2005https://movie.daum.net/moviedb/grade?movieId=4...https://img1.daumcdn.net/thumb/C408x596/?fname...1158.09.312977131
19633752피아니스트The Pianist Le Pianiste 2002https://movie.daum.net/moviedb/grade?movieId=3752https://img1.daumcdn.net/thumb/C408x596/?fname...727.09.290909110
20586123948그린 북Nonehttps://movie.daum.net/moviedb/main?movieId=12...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.289683252
18292108740원더Nonehttps://movie.daum.net/moviedb/main?movieId=10...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.279279111
1382578539천안함 프로젝트Nonehttps://movie.daum.net/moviedb/main?movieId=78539https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.275109229
21574238살인의 추억Memories of Murder 2003https://movie.daum.net/moviedb/grade?movieId=4238https://img1.daumcdn.net/thumb/C408x596/?fname...1757.09.271357199
1284871486남영동1985Namyeong-dong1985 2012https://movie.daum.net/moviedb/grade?movieId=7...https://img1.daumcdn.net/thumb/C408x596/?fname...1811.09.266667150
17618103818무현, 두 도시 이야기Nonehttps://movie.daum.net/moviedb/main?movieId=10...https://img1.daumcdn.net/thumb/C408x596/?fname...NaN9.260204196
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 72 - } - ], - "source": [ - "daum_movies[daum_movies['num_of_collected_ratings']>100].sort_values('rating_mean', ascending=False).head(30)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "-pwiAudTcj7H", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "executionInfo": { - "status": "ok", - "timestamp": 1703382059698, - "user_tz": -540, - "elapsed": 1464, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "59682c8e-5df9-4898-d55b-421467208b89" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[(None, 1441),\n", - " ('변호인', 1440),\n", - " ('기생충', 1233),\n", - " ('1987', 943),\n", - " ('반도', 797),\n", - " ('극한직업', 793),\n", - " ('백두산', 785),\n", - " ('남산의 부장들', 741),\n", - " ('강철비2: 정상회담', 728),\n", - " ('신과함께-죄와 벌', 726),\n", - " ('택시운전사', 711),\n", - " ('봉오동 전투', 689),\n", - " ('군함도', 676),\n", - " ('조커', 675),\n", - " ('82년생 김지영', 670),\n", - " ('헌트', 669),\n", - " ('승리호', 656),\n", - " ('명량', 653),\n", - " ('안시성', 634),\n", - " ('서울의 봄', 630),\n", - " ('부산행', 625),\n", - " ('인터스텔라', 613),\n", - " ('엑시트', 609),\n", - " ('어벤져스: 엔드게임', 603),\n", - " ('보헤미안 랩소디', 585),\n", - " ('곡성', 584),\n", - " ('다만 악에서 구하소서', 582),\n", - " ('암살', 560),\n", - " ('마녀', 555),\n", - " ('국제시장', 530),\n", - " ('강철비', 527),\n", - " ('탑건: 매버릭', 525),\n", - " ('공작', 523),\n", - " ('신과함께-인과 연', 520),\n", - " ('설국열차', 509),\n", - " ('한산: 용의 출현', 506),\n", - " ('#살아있다', 499),\n", - " ('비상선언', 493),\n", - " ('외계+인 1부', 490),\n", - " ('26년', 490),\n", - " ('캡틴 마블', 480),\n", - " ('범죄도시2', 479),\n", - " ('아수라', 470),\n", - " ('인천상륙작전', 458),\n", - " ('헤어질 결심', 456),\n", - " ('범죄도시3', 454),\n", - " ('베테랑', 447),\n", - " ('모가디슈', 441),\n", - " ('내부자들', 436),\n", - " ('낙원의 밤', 435),\n", - " ('더 킹', 434),\n", - " ('나랏말싸미', 432),\n", - " ('연평해전', 423),\n", - " ('노무현입니다', 420),\n", - " ('불한당: 나쁜 놈들의 세상', 419),\n", - " ('사바하', 417),\n", - " ('히트맨', 415),\n", - " ('귀향', 415),\n", - " ('길복순', 412),\n", - " ('범죄도시', 409),\n", - " ('인랑', 408),\n", - " ('증인', 403),\n", - " ('알라딘', 402),\n", - " ('터미네이터: 다크 페이트', 402),\n", - " ('그날 바다', 400),\n", - " ('마녀(魔女) Part2. The Other One', 398),\n", - " ('아바타: 물의 길', 392),\n", - " ('걸캅스', 389),\n", - " ('밀정', 384),\n", - " ('매드 맥스: 분노의 도로', 381),\n", - " ('독전', 379),\n", - " ('뮬란', 377),\n", - " ('군도:민란의 시대', 376),\n", - " ('어벤져스: 인피니티 워', 376),\n", - " ('아가씨', 375),\n", - " ('국가부도의 날', 373),\n", - " ('나를 찾아줘', 370),\n", - " ('영웅', 365),\n", - " ('악인전', 364),\n", - " ('블랙머니', 364),\n", - " ('겨울왕국 2', 364),\n", - " ('밀수', 358),\n", - " ('지푸라기라도 잡고 싶은 짐승들', 358),\n", - " ('콘크리트 유토피아', 355),\n", - " ('카터', 354),\n", - " ('완벽한 타인', 354),\n", - " ('테넷', 353),\n", - " ('남한산성', 352),\n", - " ('말모이', 351),\n", - " ('그것만이 내 세상', 350),\n", - " ('사냥의 시간', 348),\n", - " ('아저씨', 347),\n", - " ('아쿠아맨', 344),\n", - " ('천문: 하늘에 묻는다', 343),\n", - " ('시동', 341),\n", - " ('타짜: 원 아이드 잭', 337),\n", - " ('이터널스', 334),\n", - " ('조작된 도시', 328),\n", - " ('사도', 328),\n", - " ('버닝', 325)]" - ] - }, - "metadata": {}, - "execution_count": 73 - } - ], - "source": [ - "Counter(merged['titleKo']).most_common(100)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "v0ZgUyNecj7H", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "status": "ok", - "timestamp": 1703382065832, - "user_tz": -540, - "elapsed": 520, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - } - }, - "outputId": "e7703bea-3b79-4955-d37e-c4bcdf0f1536" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# # movie_dictionary\n", - "# movies = pd.read_table('../data/ml-1m_grouplens/movies.dat', sep='::', header=None, names=['movie_id', 'title', 'genres'],\n", - "# engine='python', encoding_errors='ignore')\n", - "# movie_dict = movies.to_dict('index')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "dH2kotujcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# # movie_dictionary\n", - "# movies = pd.read_table('../data/ml-1m_grouplens/movies.dat', sep='::', header=None, names=['movie_id', 'title', 'genres'],\n", - "# engine='python', encoding_errors='ignore')\n", - "# movie_dict = movies.to_dict('index')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "quOskqH_cj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# ratings = pd.read_table('../data/ml-1m_grouplens/ratings.dat', sep='::', header=None, names=['userid', 'movieid', 'rating', 'timestamp'],\n", - "# engine='python', encoding_errors='ignore')" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "uY2MvbFXcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "merged.nunique()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "KRdqOZ4Vcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "user_id_dict = {nn:i for i, nn in enumerate(merged['nickName'].unique())}" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "sm06Rn9acj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "movie_id_dict = {site_mid:i for i, site_mid in enumerate(merged['movieId'].unique())}" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "KTQpxXAxcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "merged['uid'] = merged['nickName'].map(user_id_dict)\n", - "merged['iid'] = merged['movieId'].map(movie_id_dict)\n", - "merged" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "NvblNmERcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from datetime import datetime" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "lKtgdOmVcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "merged['dt'] = merged['timestamp'].map(lambda x: datetime.fromtimestamp(x))" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "1PxKRQiDcj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "merged" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "9RWQRR64cj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "sorted_iid_lists = merged.sort_values(by=['uid', 'timestamp']).groupby('uid')['iid'].apply(list)\n", - "sorted_iid_lists" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "Aak5Mi61cj7I" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "sorted_iid_dict = dict(sorted_iid_lists)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "W34KxAQBcj7I" - } - }, - { - "cell_type": "markdown", - "source": [ - "# 모델 준비" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "HcqdE8eqcj7J" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import time\n", - "import torch\n", - "from torch import nn\n", - "import argparse\n", - "from model import SASRec\n", - "from utils import *" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "3yXU5MQScj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "def str2bool(s):\n", - " if s not in {'false', 'true'}:\n", - " raise ValueError('Not a valid boolean string')\n", - " return s == 'true'\n", - "\n", - "def get_args():\n", - " parser = argparse.ArgumentParser()\n", - " parser.add_argument('--ratings_dir', required=True)\n", - " parser.add_argument('--model_output_dir', required=True)\n", - " parser.add_argument('--batch_size', default=128, type=int)\n", - " parser.add_argument('--lr', default=0.001, type=float)\n", - " parser.add_argument('--maxlen', default=50, type=int)\n", - " parser.add_argument('--hidden_units', default=50, type=int)\n", - " parser.add_argument('--num_blocks', default=2, type=int)\n", - " parser.add_argument('--num_epochs', default=200, type=int)\n", - " parser.add_argument('--num_heads', default=1, type=int)\n", - " parser.add_argument('--dropout_rate', default=0.5, type=float)\n", - " parser.add_argument('--l2_emb', default=0.0, type=float)\n", - " parser.add_argument('--device', default='cpu', type=str)\n", - " parser.add_argument('--inference_only', default=False, type=str2bool)\n", - " parser.add_argument('--state_dict_path', default=None, type=str)\n", - " return parser.parse_args(args=['--ratings_dir', '../data/ml-1m_grouplens/ratings.dat', '--model_output_dir', 'model_output'])\n", - "args = get_args()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "dPvBk_yfcj7K" - } - }, - { - "cell_type": "markdown", - "source": [ - "# 학습모델 저장 경로" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "wETEVDG_cj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "print(f\"args.model_output_dir : {args.model_output_dir}\")\n", - "if not os.path.exists(args.model_output_dir):\n", - " os.mkdir(args.model_output_dir)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "oaC-scxWcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# [str(k) + ',' + str(v)\n", - "for k, v in sorted(vars(args).items(), key=lambda x: x[0]):\n", - " print(f\"{k:30} : {str(v):20}\")" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "s8L3ScvGcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "args.dataset = 'daum'" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "QKkdzEOQcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "args.train_dir = 'train_dir'" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "ObpkC9hjcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if not os.path.isdir(args.dataset + '_' + args.train_dir): # 데이터 없다면 폴더 만듦\n", - " os.makedirs(args.dataset + '_' + args.train_dir)\n", - " print(f\"made {args.dataset + '_' + args.train_dir} folder\")\n", - "with open(os.path.join(args.dataset + '_' + args.train_dir, 'args.txt'), 'w') as f: # argument 저장\n", - " f.write('\\n'.join([str(k) + ',' + str(v) for k, v in sorted(vars(args).items(), key=lambda x: x[0])]))\n", - " print(f\"wrote '{os.path.join(args.dataset + '_' + args.train_dir, 'args.txt')}'\")\n", - "f.close()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "s0jx1kp2cj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "args.ratings_dir" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "P-I8I_WDcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from collections import defaultdict" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "TC3EGiGPcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "f = open(args.ratings_dir, 'r')\n", - "user_seqs = defaultdict(list)\n", - "for line in f.readlines():\n", - " data_lst = line.replace('\\n', '').split('::')\n", - " if len(data_lst) < 4:\n", - " continue\n", - " userid, movieid, rating, timestamp = [int(data) for data in line.replace('\\n', '').split('::')]\n", - " user_seqs[userid].append((movieid, timestamp))\n", - "\n", - "f.close()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "B-vj4qrrcj7K" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "min(user_seqs.keys())" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "5cwBpZsHcj7L" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "max(user_seqs.keys())" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "zAfwMnI-cj7L" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "\n", - "len(user_seqs.keys())" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "JnLb_c5Pcj7L" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "for userid in list(user_seqs.keys()):\n", - " sorted_user_seq = sorted(user_seqs[userid], key=lambda x: x[1])\n", - " sorted_user_seq = [movieid for movieid, timestamp in sorted_user_seq]\n", - " user_seqs[userid] = sorted_user_seq" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "w8WBj-y5cj7L" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "# train/val/test data generation\n", - "def data_partition(fname):\n", - " usernum = 0\n", - " itemnum = 0\n", - " User = defaultdict(list)\n", - " user_train = {}\n", - " user_valid = {}\n", - " user_test = {}\n", - " # assume user/item index starting from 1\n", - " f = open(f'{fname}')\n", - " for line in f:\n", - " u, i = line.rstrip().split(' ')\n", - " u = int(u)\n", - " i = int(i)\n", - " usernum = max(u, usernum)\n", - " itemnum = max(i, itemnum)\n", - " User[u].append(i)\n", - "\n", - " for user in User:\n", - " nfeedback = len(User[user])\n", - " if nfeedback < 3:\n", - " user_train[user] = User[user]\n", - " user_valid[user] = []\n", - " user_test[user] = []\n", - " else:\n", - " user_train[user] = User[user][:-2]\n", - " user_valid[user] = []\n", - " user_valid[user].append(User[user][-2])\n", - " user_test[user] = []\n", - " user_test[user].append(User[user][-1])\n", - " return [user_train, user_valid, user_test, usernum, itemnum]\n", - "\n", - "dataset = data_partition(args.dataset)" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "F3dQNoQIcj7L" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "user_train = {}\n", - "user_valid = {}\n", - "user_test = {}\n", - "\n", - "for uid, seq in sorted_iid_dict.items():\n", - " nfeedback = len(seq)\n", - " if nfeedback < 3:\n", - " user_train[uid] = seq\n", - " user_valid[uid] = []\n", - " user_test[uid] = []\n", - " else:\n", - " user_train[uid] = seq[:-2]\n", - " user_valid[uid] = []\n", - " user_valid[uid].append(seq[-2])\n", - " user_test[uid] = []\n", - " user_test[uid].append(seq[-1])\n", - "\n", - "usernum = merged['uid'].nunique()\n", - "itemnum = merged['iid'].nunique()\n", - "dataset = [user_train, user_valid, user_test, usernum, itemnum]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "0hv7KVP3cj7M" - } - }, - { - "cell_type": "markdown", - "source": [ - "![image.png]()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "1X1cXEHCcj7M" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "print(f\"유저 최소값 : {min(user_train.keys())}\")\n", - "print(f\"유저 최대값 : {max(user_train.keys()):,}\")\n", - "print(f\"유저 수 : {len(user_train):,}\")" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "Nc5Lt6u5cj7T" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "num_batch = len(user_train) // args.batch_size # tail? + ((len(user_train) % args.batch_size) != 0)\n", - "cc = 0.0\n", - "for u in user_train:\n", - " cc += len(user_train[u])\n", - "\n", - "print(f\"len(user_train) : {len(user_train)}\")\n", - "print(f\"args.batch_size : {args.batch_size}\")\n", - "print(f\"num_batch : {num_batch}\")\n", - "print(f\"average sequence length : {(cc / len(user_train)):.2f}\")" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "SO20NHdWcj7T" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "f = open(os.path.join(args.dataset + '_' + args.train_dir, 'log.txt'), 'w')\n", - "print(f\"logging 위치 : {os.path.join(args.dataset + '_' + args.train_dir, 'log.txt')}\")" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "Bo67TKhdcj7T" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "sampler = WarpSampler(user_train, usernum, itemnum, batch_size=args.batch_size, maxlen=args.maxlen, n_workers=3)\n", - "model = SASRec(usernum, itemnum, args).to(args.device) # no ReLU activation in original SASRec implementation?" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "kWBc0AS7cj7T" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# model = SASRec(10, 100, args).to(args.device) # no ReLU activation in original SASRec implementation?" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "8_VWit6jcj7T" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "epoch_start_idx = 1\n", - "if args.state_dict_path is not None:\n", - " try:\n", - " model.load_state_dict(torch.load(args.state_dict_path, map_location=torch.device(args.device)))\n", - " tail = args.state_dict_path[args.state_dict_path.find('epoch=') + 6:]\n", - " epoch_start_idx = int(tail[:tail.find('.')]) + 1\n", - " except: # in case your pytorch version is not 1.6 etc., pls debug by pdb if load weights failed\n", - " print('failed loading state_dicts, pls check file path: ', end=\"\")\n", - " print(args.state_dict_path)\n", - " print('pdb enabled for your quick check, pls type exit() if you do not need it')\n", - " import pdb; pdb.set_trace()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "VcbpaC3scj7T" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "if args.inference_only:\n", - " model.eval()\n", - " t_test = evaluate(model, dataset, args)\n", - " print('test (NDCG@10: %.4f, HR@10: %.4f)' % (t_test[0], t_test[1]))" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "d-bNgUWscj7U" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# ce_criterion = torch.nn.CrossEntropyLoss()\n", - "# https://github.com/NVIDIA/pix2pixHD/issues/9 how could an old bug appear again...\n", - "bce_criterion = torch.nn.BCEWithLogitsLoss() # torch.nn.BCELoss()\n", - "adam_optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98))\n", - "\n", - "T = 0.0\n", - "t0 = time.time()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "-V2TDjcycj7U" - } - }, - { - "cell_type": "markdown", - "source": [ - "# Wandb 설정" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "1KYQr-0ccj7U" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "pip install wandb" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "v1lHYaLdcj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import wandb" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "lUz--E1lcj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "wandb.login()" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "ximTBLEfcj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "model_name = 'SASRec'" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "eXXlkKrPcj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# 🐝 1️⃣ Start a new run to track this script\n", - "wandb.init(\n", - " # Set the project where this run will be logged\n", - " project=\"recsys_key_papers_implementation\",\n", - " # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)\n", - " name=f\"{model_name}\",\n", - " # Track hyperparameters and run metadata\n", - " config=vars(args))" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "jmUSyeckcj7W" - } - }, - { - "cell_type": "markdown", - "source": [ - "# 학습" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - }, - "id": "Acjs3F3ocj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import copy\n", - "import random\n", - "import numpy as np" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "9OMLh3M2cj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "def evaluate(model, dataset, args):\n", - " [train, valid, test, usernum, itemnum] = copy.deepcopy(dataset)\n", - "\n", - " NDCG = 0.0\n", - " HT = 0.0\n", - " valid_user = 0.0\n", - "\n", - " if usernum > 10000:\n", - " users = random.sample(range(usernum), 10000)\n", - " else:\n", - " users = range(usernum)\n", - " for u in users:\n", - "\n", - " if len(train[u]) < 1 or len(test[u]) < 1: continue\n", - "\n", - " seq = np.zeros([args.maxlen], dtype=np.int32)\n", - " idx = args.maxlen - 1\n", - " seq[idx] = valid[u][0]\n", - " idx -= 1\n", - " for i in reversed(train[u]):\n", - " seq[idx] = i\n", - " idx -= 1\n", - " if idx == -1: break\n", - " rated = set(train[u])\n", - " rated.add(0)\n", - " item_idx = [test[u][0]]\n", - " for _ in range(100):\n", - " t = np.random.randint(1, itemnum + 1)\n", - " while t in rated: t = np.random.randint(1, itemnum + 1)\n", - " item_idx.append(t)\n", - "\n", - " try:\n", - " predictions = -model.predict(*[np.array(l) for l in [[seq], item_idx]])\n", - " except TypeError as e:\n", - " print([np.array(l) for l in [[u], [seq], item_idx]])\n", - " raise(e)\n", - " predictions = predictions[0] # - for 1st argsort DESC\n", - "\n", - " rank = predictions.argsort().argsort()[0].item()\n", - "\n", - " valid_user += 1\n", - "\n", - " if rank < 10:\n", - " NDCG += 1 / np.log2(rank + 2)\n", - " HT += 1\n", - " if valid_user % 1000 == 0:\n", - " print('.', end=\"\")\n", - " sys.stdout.flush()\n", - "\n", - " return NDCG / valid_user, HT / valid_user" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "QtHS_JNTcj7W" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "def evaluate_valid(model, dataset, args):\n", - " [train, valid, test, usernum, itemnum] = copy.deepcopy(dataset)\n", - "\n", - " NDCG = 0.0\n", - " valid_user = 0.0\n", - " HT = 0.0\n", - " if usernum > 10000:\n", - " users = random.sample(range(usernum), 10000)\n", - " else:\n", - " users = range(usernum)\n", - " for u in users:\n", - " if len(train[u]) < 1 or len(valid[u]) < 1: continue\n", - "\n", - " seq = np.zeros([args.maxlen], dtype=np.int32)\n", - " idx = args.maxlen - 1\n", - " for i in reversed(train[u]):\n", - " seq[idx] = i\n", - " idx -= 1\n", - " if idx == -1: break\n", - "\n", - " rated = set(train[u])\n", - " rated.add(0)\n", - " item_idx = [valid[u][0]]\n", - " for _ in range(100):\n", - " t = np.random.randint(1, itemnum + 1)\n", - " while t in rated: t = np.random.randint(1, itemnum + 1)\n", - " item_idx.append(t)\n", - "\n", - " predictions = -model.predict(*[np.array(l) for l in [[seq], item_idx]])\n", - " predictions = predictions[0]\n", - "\n", - " rank = predictions.argsort().argsort()[0].item()\n", - "\n", - " valid_user += 1\n", - "\n", - " if rank < 10:\n", - " NDCG += 1 / np.log2(rank + 2)\n", - " HT += 1\n", - " if valid_user % 100 == 0:\n", - " print('.', end=\"\")\n", - " sys.stdout.flush()\n", - "\n", - " return NDCG / valid_user, HT / valid_user" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "qc-QrLRAcj7X" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "best_ndcg = 0\n", - "for epoch in range(epoch_start_idx, args.num_epochs + 1):\n", - " if args.inference_only: break # just to decrease identition\n", - " for step in range(num_batch): # tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'):\n", - " u, seq, pos, neg = sampler.next_batch() # tuples to ndarray\n", - " # dataset, dataloader\n", - " u, seq, pos, neg = np.array(u), np.array(seq), np.array(pos), np.array(neg)\n", - " pos_logits, neg_logits = model(u, seq, pos, neg)\n", - " pos_labels, neg_labels = torch.ones(pos_logits.shape, device=args.device), torch.zeros(neg_logits.shape, device=args.device)\n", - " # print(\"\\neye ball check raw_logits:\"); print(pos_logits); print(neg_logits) # check pos_logits > 0, neg_logits < 0\n", - " adam_optimizer.zero_grad()\n", - " indices = np.where(pos != 0)\n", - " loss = bce_criterion(pos_logits[indices], pos_labels[indices])\n", - " loss += bce_criterion(neg_logits[indices], neg_labels[indices])\n", - " for param in model.item_emb.parameters(): loss += args.l2_emb * torch.norm(param)\n", - " loss.backward()\n", - " adam_optimizer.step()\n", - " # print(\"loss in epoch {} iteration {}: {}\".format(epoch, step, loss.item())) # expected 0.4~0.6 after init few epochs\n", - "\n", - "\n", - " # if epoch % 20 == 0:\n", - " model.eval()\n", - " t1 = time.time() - t0\n", - " T += t1\n", - " print('Evaluating', end='')\n", - " t_test = evaluate(model, dataset, args)\n", - " t_valid = evaluate_valid(model, dataset, args)\n", - " print(f\"epoch:{epoch:4}, time: {T//3600:.0f} h {T//60:.0f} min {T%60:.0f} sec, valid (NDCG@10: {t_valid[0]:.4f}, HR@10: {t_valid[1]:.4f}), test (NDCG@10: {t_test[0]:.4f}, HR@10: {t_test[1]:.4f})\")\n", - " # wandb.log({\"epoch\": epoch, \"loss\": loss.item(), \"valid NDCG@10\" : t_valid[0], \"valid HR@10\" : t_valid[1], \"test NDCG@10\" : t_test[0], \"test HR@10\" : t_test[1]})\n", - " f.write(str(t_valid) + ' ' + str(t_test) + '\\n')\n", - " f.flush()\n", - " t0 = time.time()\n", - " model.train()\n", - "\n", - "\n", - " if t_valid[0] > best_ndcg:\n", - "\n", - " folder = args.train_dir\n", - " fname = f'SASRec_epoch_{int(epoch)}.pth'\n", - " torch.save(model.state_dict(), os.path.join(args.dataset + '_' + args.train_dir, fname))\n", - "\n", - " best_ndcg = t_valid[0]" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "Ays1pqRJcj7X" - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "args.train_dir" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "9IV2qU08eUoA" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "kJ71pBElZAMw" - }, - "outputs": [], - "source": [ - "fname" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "YTOEme16ZAMx" - }, - "outputs": [], - "source": [ - "[train, valid, test, usernum, itemnum] = copy.deepcopy(dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "mCTaT0q6ZAMx" - }, - "outputs": [], - "source": [ - "len(valid)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "JY8uWnbQZAMx" - }, - "outputs": [], - "source": [ - "len(train)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "XIlB-PsaZAMx" - }, - "outputs": [], - "source": [ - "train[23783]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "idx" - ], - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "id": "RMKHCMydZAMy" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "elapsed": 6, - "status": "ok", - "timestamp": 1691554320161, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - }, - "user_tz": -540 - }, - "id": "36yGpn0bfken", - "outputId": "e6ef1899-3cec-440b-abaa-b4c603c669fd", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0], dtype=int32)" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UUYDUUSzfkaY", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "idx = args.maxlen - 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "elapsed": 6, - "status": "ok", - "timestamp": 1691554334176, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - }, - "user_tz": -540 - }, - "id": "mEytZLvffkW0", - "outputId": "52c70557-16a8-4be5-89ca-7a419262e4a8", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "49" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "idx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VxkJR8u9ft2A", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "seq[idx] = valid[u][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "elapsed": 3, - "status": "ok", - "timestamp": 1691554352527, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - }, - "user_tz": -540 - }, - "id": "cDF3kk6hftyb", - "outputId": "7a97a1b4-44fd-4995-ff4e-342b68ddb7dd", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78],\n", - " dtype=int32)" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OxApLIHMftu6", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "idx -= 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1ymZyBDbf2OG", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "train[u]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JXR-RkS8ftrY", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "for i in reversed(train[u]):\n", - " seq[idx] = i\n", - " idx -= 1\n", - " if idx == -1: break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "elapsed": 3, - "status": "ok", - "timestamp": 1691554369744, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - }, - "user_tz": -540 - }, - "id": "y8TpzgyeftoG", - "outputId": "30470ebf-3824-4dbe-9122-b90452edbf66", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,\n", - " 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,\n", - " 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78],\n", - " dtype=int32)" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "q5eTR4fFftkC", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "rated = set(train[u])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ifhJCb9Iftfh", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "rated" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CoFAVq8mfkTY", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "rated.add(0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "v4uAqf8KfkPh", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "item_idx = [test[u][0]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "elapsed": 3, - "status": "ok", - "timestamp": 1691554427589, - "user": { - "displayName": "KYEONGCHAN LEE", - "userId": "03106579917275952793" - }, - "user_tz": -540 - }, - "id": "EfZENwm6fjnH", - "outputId": "9720051c-5856-4f3b-e1f2-856f1e7362b1", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[79]" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "item_idx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "flN9FCy1ce0H", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "for _ in range(100):\n", - " t = np.random.randint(1, itemnum + 1)\n", - " while t in rated: t = np.random.randint(1, itemnum + 1)\n", - " item_idx.append(t)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "n6Lco1xWgNQu", - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bTBOLLklgDSc", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "item_idx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VZcaSWwEgDYx", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OINRXapegDcs", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ewNkZWFEgDgJ", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Yr_MJ2dLgDjx", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SQYsGfPfgDnL", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "V2bQJD8FgDqw", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "background_save": true - }, - "id": "3t9iqc31gDuw", - "pycharm": { - "name": "#%%\n" - }, - "outputId": "65e4a0a4-2732-46e4-9b68-653d31662abe" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR:root:Internal Python error in the inspect module.\n", - "Below is the traceback from this internal error.\n", - "\n", - "ERROR:root:Internal Python error in the inspect module.\n", - "Below is the traceback from this internal error.\n", - "\n", - "ERROR:root:Internal Python error in the inspect module.\n", - "Below is the traceback from this internal error.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n", - " File \"\", line 1, in \n", - " torch.LongTensor(positions, device=model.dev)\n", - "RuntimeError: legacy constructor expects device type: cpu but device type: cuda was passed\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", - " stb = value._render_traceback_()\n", - "AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n", - " return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n", - " return f(*args, **kwargs)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n", - " records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n", - " File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n", - " frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n", - " filename = getsourcefile(frame) or getfile(frame)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n", - " module = getmodule(object, filename)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 861, in getmodule\n", - " file = getabsfile(object, _filename)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 845, in getabsfile\n", - " return os.path.normcase(os.path.abspath(_filename))\n", - " File \"/usr/lib/python3.10/posixpath.py\", line 384, in abspath\n", - " cwd = os.getcwd()\n", - "OSError: [Errno 107] Transport endpoint is not connected\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n", - " File \"\", line 1, in \n", - " torch.LongTensor(positions, device=model.dev)\n", - "RuntimeError: legacy constructor expects device type: cpu but device type: cuda was passed\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", - " stb = value._render_traceback_()\n", - "AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n", - " if (await self.run_code(code, result, async_=asy)):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3575, in run_code\n", - " self.showtraceback(running_compiled_code=True)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2101, in showtraceback\n", - " stb = self.InteractiveTB.structured_traceback(etype,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1367, in structured_traceback\n", - " return FormattedTB.structured_traceback(\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1267, in structured_traceback\n", - " return VerboseTB.structured_traceback(\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1124, in structured_traceback\n", - " formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1082, in format_exception_as_a_whole\n", - " last_unique, recursion_repeat = find_recursion(orig_etype, evalue, records)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 382, in find_recursion\n", - " return len(records), 0\n", - "TypeError: object of type 'NoneType' has no len()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", - " stb = value._render_traceback_()\n", - "AttributeError: 'TypeError' object has no attribute '_render_traceback_'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n", - " return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n", - " return f(*args, **kwargs)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n", - " records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n", - " File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n", - " frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n", - " filename = getsourcefile(frame) or getfile(frame)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n", - " module = getmodule(object, filename)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 861, in getmodule\n", - " file = getabsfile(object, _filename)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 845, in getabsfile\n", - " return os.path.normcase(os.path.abspath(_filename))\n", - " File \"/usr/lib/python3.10/posixpath.py\", line 384, in abspath\n", - " cwd = os.getcwd()\n", - "OSError: [Errno 107] Transport endpoint is not connected\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n", - " File \"\", line 1, in \n", - " torch.LongTensor(positions, device=model.dev)\n", - "RuntimeError: legacy constructor expects device type: cpu but device type: cuda was passed\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", - " stb = value._render_traceback_()\n", - "AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n", - " if (await self.run_code(code, result, async_=asy)):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3575, in run_code\n", - " self.showtraceback(running_compiled_code=True)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2101, in showtraceback\n", - " stb = self.InteractiveTB.structured_traceback(etype,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1367, in structured_traceback\n", - " return FormattedTB.structured_traceback(\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1267, in structured_traceback\n", - " return VerboseTB.structured_traceback(\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1124, in structured_traceback\n", - " formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1082, in format_exception_as_a_whole\n", - " last_unique, recursion_repeat = find_recursion(orig_etype, evalue, records)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 382, in find_recursion\n", - " return len(records), 0\n", - "TypeError: object of type 'NoneType' has no len()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", - " stb = value._render_traceback_()\n", - "AttributeError: 'TypeError' object has no attribute '_render_traceback_'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3030, in _run_cell\n", - " return runner(coro)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py\", line 78, in _pseudo_sync_runner\n", - " coro.send(None)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3257, in run_cell_async\n", - " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3492, in run_ast_nodes\n", - " self.showtraceback()\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2101, in showtraceback\n", - " stb = self.InteractiveTB.structured_traceback(etype,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1367, in structured_traceback\n", - " return FormattedTB.structured_traceback(\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1267, in structured_traceback\n", - " return VerboseTB.structured_traceback(\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1142, in structured_traceback\n", - " formatted_exceptions += self.format_exception_as_a_whole(etype, evalue, etb, lines_of_context,\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1082, in format_exception_as_a_whole\n", - " last_unique, recursion_repeat = find_recursion(orig_etype, evalue, records)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 382, in find_recursion\n", - " return len(records), 0\n", - "TypeError: object of type 'NoneType' has no len()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", - " stb = value._render_traceback_()\n", - "AttributeError: 'TypeError' object has no attribute '_render_traceback_'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n", - " return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n", - " return f(*args, **kwargs)\n", - " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n", - " records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n", - " File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n", - " frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n", - " filename = getsourcefile(frame) or getfile(frame)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n", - " module = getmodule(object, filename)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 861, in getmodule\n", - " file = getabsfile(object, _filename)\n", - " File \"/usr/lib/python3.10/inspect.py\", line 845, in getabsfile\n", - " return os.path.normcase(os.path.abspath(_filename))\n", - " File \"/usr/lib/python3.10/posixpath.py\", line 384, in abspath\n", - " cwd = os.getcwd()\n", - "OSError: [Errno 107] Transport endpoint is not connected\n" - ] - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lJETHzwigDxy", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kgzKNZfYgD1m", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tf4_A2JmgD5c", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_8phDK1ugD9r", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YSVy5mX-gEBA", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HX5QFhPJgEEZ", - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.9" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file +{"cells":[{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting pymysql\n"," Downloading PyMySQL-1.1.0-py3-none-any.whl (44 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.8/44.8 kB\u001b[0m \u001b[31m756.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: pymysql\n","Successfully installed pymysql-1.1.0\n"]}],"source":["pip install pymysql"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"NKDyPANDcj7D","executionInfo":{"status":"ok","timestamp":1704095209618,"user_tz":-540,"elapsed":8701,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}},"outputId":"54d454bc-6686-44fa-cb75-901b79591553","colab":{"base_uri":"https://localhost:8080/"}}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["import pymysql"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"WVE63KBbcj7D"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["endpoint = \"pseudorec.cvhv2t0obyv3.ap-northeast-2.rds.amazonaws.com\"\n","port = 3306\n","user = \"admin\"\n","region = \"ap-northeast-2c\"\n","dbname = \"movielens25m\"\n","passwd = 'Precsys1!'\n","\n","# connection = pymysql.connect(host=endpoint, user=user, passwd=passwd, port=port,\n","# database=dbname)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"xZRkRRYucj7E"}},{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"pycharm":{"name":"#%%\n"},"colab":{"base_uri":"https://localhost:8080/"},"id":"nwzAtR3Ecj7E","executionInfo":{"status":"ok","timestamp":1704096023877,"user_tz":-540,"elapsed":24817,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}},"outputId":"b9af564e-9c19-4963-d3c8-b1698641760f"}},{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'/content'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":5}],"source":["import os\n","os.getcwd()"],"metadata":{"pycharm":{"name":"#%%\n"},"colab":{"base_uri":"https://localhost:8080/","height":35},"id":"jLYz0Wx9cj7E","executionInfo":{"status":"ok","timestamp":1704096023878,"user_tz":-540,"elapsed":21,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}},"outputId":"644f47b5-f0f0-4ccf-a0f5-c48e36b82fea"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["import os\n","os.chdir('/content/drive/MyDrive/000GithubRepos/recsys_key_papers_implementation/SASRec')"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"uF4rBkGgcj7E"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["import pandas as pd"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"wqdPtiQucj7E"}},{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['/content',\n"," '/env/python',\n"," '/usr/lib/python310.zip',\n"," '/usr/lib/python3.10',\n"," '/usr/lib/python3.10/lib-dynload',\n"," '',\n"," '/usr/local/lib/python3.10/dist-packages',\n"," '/usr/lib/python3/dist-packages',\n"," '/usr/local/lib/python3.10/dist-packages/IPython/extensions',\n"," '/root/.ipython']"]},"metadata":{},"execution_count":8}],"source":["import sys\n","sys.path"],"metadata":{"pycharm":{"name":"#%%\n"},"colab":{"base_uri":"https://localhost:8080/"},"id":"e7emrLYncj7E","executionInfo":{"status":"ok","timestamp":1704096028671,"user_tz":-540,"elapsed":6,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}},"outputId":"af4f1390-5a07-479e-b75c-1b2bedc412c1"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["sys.path.append('../')"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"QNnGfwwmcj7E"}},{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting boto3\n"," Downloading boto3-1.34.11-py3-none-any.whl (139 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.3/139.3 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting botocore<1.35.0,>=1.34.11 (from boto3)\n"," Downloading botocore-1.34.11-py3-none-any.whl (11.9 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.9/11.9 MB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3)\n"," Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n","Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)\n"," Downloading s3transfer-0.10.0-py3-none-any.whl (82 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.1/82.1 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.11->boto3) (2.8.2)\n","Requirement already satisfied: urllib3<2.1,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.11->boto3) (2.0.7)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.35.0,>=1.34.11->boto3) (1.16.0)\n","Installing collected packages: jmespath, botocore, s3transfer, boto3\n","Successfully installed boto3-1.34.11 botocore-1.34.11 jmespath-1.0.1 s3transfer-0.10.0\n"]}],"source":["pip install boto3"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"tqYg8k9_cj7E","outputId":"52e9938c-8aa9-44fc-d450-2584ddf2543a","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1704096043391,"user_tz":-540,"elapsed":9985,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}}}},{"cell_type":"markdown","source":["# 데이터 불러오기"],"metadata":{"collapsed":false,"pycharm":{"name":"#%% md\n"},"id":"KV0gagCTcj7E"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["import os\n","\n","import boto3\n","import pandas as pd\n","import pymysql\n","from boto3.dynamodb.conditions import Key\n","# from dotenv import load_dotenv\n","\n","# load_dotenv()\n","\n","class MysqlClient:\n"," def __init__(self):\n"," self.endpoint = \"pseudorec.cvhv2t0obyv3.ap-northeast-2.rds.amazonaws.com\"\n"," self.port = 3306\n"," self.user = \"admin\"\n"," self.region = \"ap-northeast-2c\"\n"," self.dbname = \"movielens25m\"\n"," self.passwd = 'Precsys1!'\n"," os.environ['LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN'] = '1'\n"," # self.connection = pymysql.connect(host=endpoint, user=user, passwd=passwd, port=port, database=dbname)\n","\n"," def get_connection(self):\n"," connection = pymysql.connect(host=self.endpoint, user=self.user, passwd=self.passwd, port=self.port,\n"," database=self.dbname)\n"," return connection\n","\n"," def get_count(self, table_name):\n"," with self.get_connection().cursor() as cursor:\n"," cursor.execute(f\"select count(*) from {table_name}\")\n"," return cursor.fetchall()[0][0]\n","\n"," def get_movies(self):\n"," with self.get_connection() as connection:\n"," df = pd.read_sql(sql='select * from movies', con=connection)\n"," return df\n","\n"," def get_daum_movies(self):\n"," with self.get_connection() as connection:\n"," df = pd.read_sql(sql='select * from daum_movies', con=connection)\n"," return df\n","\n"," def get_daum_ratings(self):\n"," with self.get_connection() as connection:\n"," df = pd.read_sql(sql='select * from daum_ratings', con=connection)\n"," return df\n","\n"," def get_url(self, title):\n"," with self.get_connection() as connection:\n"," cursor = connection.cursor()\n"," cursor.execute(f\"\"\"\n"," select url from movies where title = '{title}'\n"," \"\"\")\n"," url = cursor.fetchall()[0][0]\n"," return url\n","\n"," def get_table_names(self):\n"," print(\"Tables : \")\n"," with self.get_connection().cursor() as cursor:\n"," sql = \"SHOW TABLES\"\n"," cursor.execute(sql)\n"," result = cursor.fetchall()\n"," for row in result:\n"," print(row[0])\n","\n"," def get_data_type(self, table_name):\n"," with self.get_connection().cursor() as cursor:\n"," cursor.execute(f\"SHOW COLUMNS FROM {table_name}\")\n"," columns = cursor.fetchall()\n"," for column in columns:\n"," column_name = column[0]\n"," data_type = column[1]\n"," print(f\"Column: {column_name}, Data Type: {data_type}\")\n","\n","\n","class DynamoDB:\n"," def __init__(self, table_name: str):\n"," self.resource = boto3.resource(\n"," 'dynamodb',\n"," aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],\n"," aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],\n"," region_name=os.environ['AWS_REGION_NAME'],\n"," )\n","\n"," self.client = boto3.client(\n"," 'dynamodb',\n"," aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],\n"," aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],\n"," region_name=os.environ['AWS_REGION_NAME'],\n"," )\n"," self.table = self.resource.Table(table_name) # clicklog 테이블 등으로 연결\n","\n"," def put_item(self, click_log):\n"," resp = self.table.put_item(Item=click_log)\n","\n"," def get_a_user_logs(self, user_name: str):\n"," query = {\"KeyConditionExpression\": Key(\"userId\").eq(user_name)}\n"," resp = self.table.query(**query)\n"," return pd.DataFrame(resp['Items'])\n"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"akF5br0Dcj7E"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["mysql = MysqlClient()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"w7_oitsecj7E"}},{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":[":39: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n"," df = pd.read_sql(sql='select * from daum_movies', con=connection)\n"]}],"source":["daum_movies = mysql.get_daum_movies()"],"metadata":{"pycharm":{"name":"#%%\n"},"colab":{"base_uri":"https://localhost:8080/"},"id":"z4smMQdpcj7E","executionInfo":{"status":"ok","timestamp":1704096244710,"user_tz":-540,"elapsed":1068,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}},"outputId":"7c9dc9d2-de55-4259-bf1d-f83e62f8954f"}},{"cell_type":"markdown","source":["# 테이블 총 개수"],"metadata":{"collapsed":false,"pycharm":{"name":"#%% md\n"},"id":"LLdujZ3icj7G"}},{"cell_type":"code","execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":[":44: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n"," df = pd.read_sql(sql='select * from daum_ratings', con=connection)\n"]}],"source":["daum_ratings = mysql.get_daum_ratings()"],"metadata":{"pycharm":{"name":"#%%\n"},"colab":{"base_uri":"https://localhost:8080/"},"id":"FiIIbDCacj7G","executionInfo":{"status":"ok","timestamp":1704096269330,"user_tz":-540,"elapsed":5370,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"}},"outputId":"fd51ea43-3f0e-4307-be86-bfe29883f295"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["available_user = daum_ratings[daum_ratings['nickName'].map(lambda x: x not in ['휴면 사용자', '', '닉네임을 등록해 주세요', '닉네임'])]"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"dl1kyGkbM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["merged = pd.merge(left=available_user, right=daum_movies, how='left', on='movieId')[['nickName', 'movieId', 'titleKo','rating', 'timestamp', 'numOfSiteRatings']]"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"ip0yIMgeM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["merged"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"pQ3Dydq1M0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["average_ratings = merged.groupby('movieId')['rating'].mean().reset_index()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"L9oBM9_QM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["rating_mean_dict = dict(zip(average_ratings['movieId'], average_ratings['rating']))"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"T7y42sX-M0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["from collections import Counter\n","rating_num_dict = Counter(merged['movieId'])"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"QbsR1l9eM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["daum_movies['rating_mean'] = daum_movies['movieId'].map(rating_mean_dict)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"2JDHtBZDM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["daum_movies[daum_movies['movieId'] == 128434]"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"EY8e4iBfM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["daum_movies['num_of_collected_ratings'] = daum_movies['movieId'].map(rating_num_dict)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"OmBngzUGM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["daum_ratings[daum_ratings['movieId'] == 128434]['rating'].mean()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"nLzMqAmTM0GT"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["daum_movies[daum_movies['num_of_collected_ratings']>100].sort_values('rating_mean', ascending=False).head(30)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"hIMN_vg1M0GU"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["Counter(merged['titleKo']).most_common(10)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"IJjXwq3iM0GU"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# # movie_dictionary\n","# movies = pd.read_table('../data/ml-1m_grouplens/movies.dat', sep='::', header=None, names=['movie_id', 'title', 'genres'],\n","# engine='python', encoding_errors='ignore')\n","# movie_dict = movies.to_dict('index')"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"Mm7AiRe9M0GU"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# # movie_dictionary\n","# movies = pd.read_table('../data/ml-1m_grouplens/movies.dat', sep='::', header=None, names=['movie_id', 'title', 'genres'],\n","# engine='python', encoding_errors='ignore')\n","# movie_dict = movies.to_dict('index')"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"FrJPrPPGM0GU"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# ratings = pd.read_table('../data/ml-1m_grouplens/ratings.dat', sep='::', header=None, names=['userid', 'movieid', 'rating', 'timestamp'],\n","# engine='python', encoding_errors='ignore')"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"1zS_EUIhM0GU"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["merged.nunique()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"HjYzcvjKM0GU"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["user_id_dict = {nn:i for i, nn in enumerate(merged['nickName'].unique())}"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"4YIcfuofM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["movie_id_dict = {site_mid:i for i, site_mid in enumerate(merged['movieId'].unique())}"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"d0mHZk6bM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# uid, iid 부여\n","merged['uid'] = merged['nickName'].map(user_id_dict)\n","merged['iid'] = merged['movieId'].map(movie_id_dict)\n","merged"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"jayOzqrbM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["from datetime import datetime"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"dO9OiKPvM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["merged['dt'] = merged['timestamp'].map(lambda x: datetime.fromtimestamp(x))"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"szB3AkNDM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["merged"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"qznIxtPtM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["sorted_iid_lists = merged.sort_values(by=['uid', 'timestamp']).groupby('uid')['iid'].apply(list)\n","sorted_iid_lists"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"HSuJ3D9gM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["sorted_iid_dict = dict(sorted_iid_lists)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"4cPQDnwoM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# 시퀀스 평균 길이는?\n","import numpy as np\n","np.mean([len(seq) for uid, seq in sorted_iid_dict.items()])"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"glSfVP1KM0GW"}},{"cell_type":"markdown","source":["# 모델 준비"],"metadata":{"collapsed":false,"pycharm":{"name":"#%% md\n"},"id":"ev73tJx8M0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["import time\n","import torch\n","from torch import nn\n","import argparse\n","from model import SASRec\n","from utils import *"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"g4doBoMmM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["def str2bool(s):\n"," if s not in {'false', 'true'}:\n"," raise ValueError('Not a valid boolean string')\n"," return s == 'true'\n","\n","def get_args():\n"," parser = argparse.ArgumentParser()\n"," parser.add_argument('--ratings_dir', required=True)\n"," parser.add_argument('--model_output_dir', required=True)\n"," parser.add_argument('--batch_size', default=128, type=int)\n"," parser.add_argument('--lr', default=0.001, type=float)\n"," parser.add_argument('--maxlen', default=50, type=int)\n"," parser.add_argument('--hidden_units', default=50, type=int)\n"," parser.add_argument('--num_blocks', default=2, type=int)\n"," parser.add_argument('--num_epochs', default=200, type=int)\n"," parser.add_argument('--num_heads', default=1, type=int)\n"," parser.add_argument('--dropout_rate', default=0.5, type=float)\n"," parser.add_argument('--l2_emb', default=0.0, type=float)\n"," parser.add_argument('--device', default='cpu', type=str)\n"," parser.add_argument('--inference_only', default=False, type=str2bool)\n"," parser.add_argument('--state_dict_path', default=None, type=str)\n"," return parser.parse_args(args=['--ratings_dir', '../data/ml-1m_grouplens/ratings.dat', '--model_output_dir', 'model_output'])\n","args = get_args()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"PvitQyz3M0GW"}},{"cell_type":"markdown","source":["# 학습모델 저장 경로"],"metadata":{"collapsed":false,"pycharm":{"name":"#%% md\n"},"id":"zBMpinNCM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["print(f\"args.model_output_dir : {args.model_output_dir}\")\n","if not os.path.exists(args.model_output_dir):\n"," os.mkdir(args.model_output_dir)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"QW2Mv2dyM0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# [str(k) + ',' + str(v)\n","for k, v in sorted(vars(args).items(), key=lambda x: x[0]):\n"," print(f\"{k:30} : {str(v):20}\")"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"XZFpjnb0M0GW"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["args.dataset = 'daum'"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"hG1lNG9dM0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["args.train_dir = 'train_dir'"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"IYhd4-3yM0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["if not os.path.isdir(args.dataset + '_' + args.train_dir): # 데이터 없다면 폴더 만듦\n"," os.makedirs(args.dataset + '_' + args.train_dir)\n"," print(f\"made {args.dataset + '_' + args.train_dir} folder\")\n","with open(os.path.join(args.dataset + '_' + args.train_dir, 'args.txt'), 'w') as f: # argument 저장\n"," f.write('\\n'.join([str(k) + ',' + str(v) for k, v in sorted(vars(args).items(), key=lambda x: x[0])]))\n"," print(f\"wrote '{os.path.join(args.dataset + '_' + args.train_dir, 'args.txt')}'\")\n","f.close()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"bhYMV8X9M0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["from collections import defaultdict"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"qqUw9uPZM0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# f = open(args.ratings_dir, 'r')\n","# user_seqs = defaultdict(list)\n","# for line in f.readlines():\n","# data_lst = line.replace('\\n', '').split('::')\n","# if len(data_lst) < 4:\n","# continue\n","# userid, movieid, rating, timestamp = [int(data) for data in line.replace('\\n', '').split('::')]\n","# user_seqs[userid].append((movieid, timestamp))\n","\n","# f.close()"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"3SyTd-AbM0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# for userid in list(user_seqs.keys()):\n","# sorted_user_seq = sorted(user_seqs[userid], key=lambda x: x[1])\n","# sorted_user_seq = [movieid for movieid, timestamp in sorted_user_seq]\n","# user_seqs[userid] = sorted_user_seq"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"x3SJUCu8M0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["# from collections import defaultdict\n","\n","# # train/val/test data generation\n","# def data_partition(fname):\n","# usernum = 0\n","# itemnum = 0\n","# User = defaultdict(list)\n","# user_train = {}\n","# user_valid = {}\n","# user_test = {}\n","# # assume user/item index starting from 1\n","# f = open(f'{fname}')\n","# for line in f:\n","# u, i = line.rstrip().split(' ')\n","# u = int(u)\n","# i = int(i)\n","# usernum = max(u, usernum)\n","# itemnum = max(i, itemnum)\n","# User[u].append(i)\n","\n","# for user in User:\n","# nfeedback = len(User[user])\n","# if nfeedback < 3:\n","# user_train[user] = User[user]\n","# user_valid[user] = []\n","# user_test[user] = []\n","# else:\n","# user_train[user] = User[user][:-2]\n","# user_valid[user] = []\n","# user_valid[user].append(User[user][-2])\n","# user_test[user] = []\n","# user_test[user].append(User[user][-1])\n","# return [user_train, user_valid, user_test, usernum, itemnum]\n","\n","# dataset = data_partition(args.dataset)"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"QNafUM4oM0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["user_train = {}\n","user_valid = {}\n","user_test = {}\n","\n","for uid, seq in sorted_iid_dict.items():\n"," nfeedback = len(seq)\n"," if nfeedback < 3:\n"," user_train[uid] = seq\n"," user_valid[uid] = []\n"," user_test[uid] = []\n"," else:\n"," user_train[uid] = seq[:-2]\n"," user_valid[uid] = []\n"," user_valid[uid].append(seq[-2])\n"," user_test[uid] = []\n"," user_test[uid].append(seq[-1])\n","\n","usernum = merged['uid'].nunique()\n","itemnum = merged['iid'].nunique()\n","dataset = [user_train, user_valid, user_test, usernum, itemnum]"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"o5sj0ZPGM0GX"}},{"cell_type":"markdown","source":["![image.png]()"],"metadata":{"collapsed":false,"pycharm":{"name":"#%% md\n"},"id":"s1Pgi3nDM0GX"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["print(f\"유저 최소값 : {min(user_train.keys())}\")\n","print(f\"유저 최대값 : {max(user_train.keys()):,}\")\n","print(f\"유저 수 : {len(user_train):,}\")"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"hFXI_7UdM0Gb"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["num_batch = len(user_train) // args.batch_size # tail? + ((len(user_train) % args.batch_size) != 0)\n","cc = 0.0\n","for u in user_train:\n"," cc += len(user_train[u])\n","\n","print(f\"len(user_train) : {len(user_train)}\")\n","print(f\"args.batch_size : {args.batch_size}\")\n","print(f\"num_batch : {num_batch}\")\n","print(f\"average sequence length : {(cc / len(user_train)):.2f}\")"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"gZGrdcoDM0Gb"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["f = open(os.path.join(args.dataset + '_' + args.train_dir, 'log.txt'), 'w')\n","print(f\"logging 위치 : {os.path.join(args.dataset + '_' + args.train_dir, 'log.txt')}\")"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"WLpFvu-MM0Gb"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["sampler = WarpSampler(user_train, usernum, itemnum, batch_size=args.batch_size, maxlen=args.maxlen, n_workers=3)\n","model = SASRec(usernum, itemnum, args).to(args.device) # no ReLU activation in original SASRec implementation?"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"xsSaYmSuPQm8"}},{"cell_type":"code","execution_count":null,"outputs":[],"source":["fname"],"metadata":{"pycharm":{"name":"#%%\n"},"id":"dzmqkqC5PgFE"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"VxkJR8u9ft2A","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["seq[idx] = valid[u][0]"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1691554352527,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"},"user_tz":-540},"id":"cDF3kk6hftyb","outputId":"7a97a1b4-44fd-4995-ff4e-342b68ddb7dd","pycharm":{"name":"#%%\n"}},"outputs":[{"data":{"text/plain":["array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n"," 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n"," 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78],\n"," dtype=int32)"]},"execution_count":64,"metadata":{},"output_type":"execute_result"}],"source":["seq"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OxApLIHMftu6","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["idx -= 1"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1ymZyBDbf2OG","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["train[u]"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JXR-RkS8ftrY","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["for i in reversed(train[u]):\n"," seq[idx] = i\n"," idx -= 1\n"," if idx == -1: break"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":832,"status":"ok","timestamp":1704099390065,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"},"user_tz":-540},"id":"y8TpzgyeftoG","outputId":"d5dfe56e-61d9-46b3-8e60-2fa052b314a6","pycharm":{"name":"#%%\n"}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["[1131]"]},"metadata":{},"execution_count":74}],"source":["seq"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"q5eTR4fFftkC","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["rated = set(train[u])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ifhJCb9Iftfh","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["rated"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CoFAVq8mfkTY","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["rated.add(0)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"v4uAqf8KfkPh","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["item_idx = [test[u][0]]"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1691554427589,"user":{"displayName":"KYEONGCHAN LEE","userId":"03106579917275952793"},"user_tz":-540},"id":"EfZENwm6fjnH","outputId":"9720051c-5856-4f3b-e1f2-856f1e7362b1","pycharm":{"name":"#%%\n"}},"outputs":[{"data":{"text/plain":["[79]"]},"execution_count":73,"metadata":{},"output_type":"execute_result"}],"source":["item_idx"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"flN9FCy1ce0H","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["for _ in range(100):\n"," t = np.random.randint(1, itemnum + 1)\n"," while t in rated: t = np.random.randint(1, itemnum + 1)\n"," item_idx.append(t)"]},{"cell_type":"markdown","metadata":{"id":"n6Lco1xWgNQu","pycharm":{"name":"#%% md\n"}},"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"bTBOLLklgDSc","pycharm":{"name":"#%%\n"}},"outputs":[],"source":["item_idx"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"VZcaSWwEgDYx","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OINRXapegDcs","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ewNkZWFEgDgJ","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Yr_MJ2dLgDjx","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"SQYsGfPfgDnL","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"V2bQJD8FgDqw","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"3t9iqc31gDuw","pycharm":{"name":"#%%\n"},"outputId":"65e4a0a4-2732-46e4-9b68-653d31662abe"},"outputs":[{"name":"stderr","output_type":"stream","text":["ERROR:root:Internal Python error in the inspect module.\n","Below is the traceback from this internal error.\n","\n","ERROR:root:Internal Python error in the inspect module.\n","Below is the traceback from this internal error.\n","\n","ERROR:root:Internal Python error in the inspect module.\n","Below is the traceback from this internal error.\n","\n"]},{"name":"stdout","output_type":"stream","text":["Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n"," exec(code_obj, self.user_global_ns, self.user_ns)\n"," File \"\", line 1, in \n"," torch.LongTensor(positions, device=model.dev)\n","RuntimeError: legacy constructor expects device type: cpu but device type: cuda was passed\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n"," stb = value._render_traceback_()\n","AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n"," return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n"," return f(*args, **kwargs)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n"," records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n"," File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n"," frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n"," File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n"," filename = getsourcefile(frame) or getfile(frame)\n"," File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n"," module = getmodule(object, filename)\n"," File \"/usr/lib/python3.10/inspect.py\", line 861, in getmodule\n"," file = getabsfile(object, _filename)\n"," File \"/usr/lib/python3.10/inspect.py\", line 845, in getabsfile\n"," return os.path.normcase(os.path.abspath(_filename))\n"," File \"/usr/lib/python3.10/posixpath.py\", line 384, in abspath\n"," cwd = os.getcwd()\n","OSError: [Errno 107] Transport endpoint is not connected\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n"," exec(code_obj, self.user_global_ns, self.user_ns)\n"," File \"\", line 1, in \n"," torch.LongTensor(positions, device=model.dev)\n","RuntimeError: legacy constructor expects device type: cpu but device type: cuda was passed\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n"," stb = value._render_traceback_()\n","AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n"," if (await self.run_code(code, result, async_=asy)):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3575, in run_code\n"," self.showtraceback(running_compiled_code=True)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2101, in showtraceback\n"," stb = self.InteractiveTB.structured_traceback(etype,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1367, in structured_traceback\n"," return FormattedTB.structured_traceback(\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1267, in structured_traceback\n"," return VerboseTB.structured_traceback(\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1124, in structured_traceback\n"," formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1082, in format_exception_as_a_whole\n"," last_unique, recursion_repeat = find_recursion(orig_etype, evalue, records)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 382, in find_recursion\n"," return len(records), 0\n","TypeError: object of type 'NoneType' has no len()\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n"," stb = value._render_traceback_()\n","AttributeError: 'TypeError' object has no attribute '_render_traceback_'\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n"," return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n"," return f(*args, **kwargs)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n"," records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n"," File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n"," frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n"," File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n"," filename = getsourcefile(frame) or getfile(frame)\n"," File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n"," module = getmodule(object, filename)\n"," File \"/usr/lib/python3.10/inspect.py\", line 861, in getmodule\n"," file = getabsfile(object, _filename)\n"," File \"/usr/lib/python3.10/inspect.py\", line 845, in getabsfile\n"," return os.path.normcase(os.path.abspath(_filename))\n"," File \"/usr/lib/python3.10/posixpath.py\", line 384, in abspath\n"," cwd = os.getcwd()\n","OSError: [Errno 107] Transport endpoint is not connected\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n"," exec(code_obj, self.user_global_ns, self.user_ns)\n"," File \"\", line 1, in \n"," torch.LongTensor(positions, device=model.dev)\n","RuntimeError: legacy constructor expects device type: cpu but device type: cuda was passed\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n"," stb = value._render_traceback_()\n","AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3473, in run_ast_nodes\n"," if (await self.run_code(code, result, async_=asy)):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3575, in run_code\n"," self.showtraceback(running_compiled_code=True)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2101, in showtraceback\n"," stb = self.InteractiveTB.structured_traceback(etype,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1367, in structured_traceback\n"," return FormattedTB.structured_traceback(\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1267, in structured_traceback\n"," return VerboseTB.structured_traceback(\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1124, in structured_traceback\n"," formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1082, in format_exception_as_a_whole\n"," last_unique, recursion_repeat = find_recursion(orig_etype, evalue, records)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 382, in find_recursion\n"," return len(records), 0\n","TypeError: object of type 'NoneType' has no len()\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n"," stb = value._render_traceback_()\n","AttributeError: 'TypeError' object has no attribute '_render_traceback_'\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3030, in _run_cell\n"," return runner(coro)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py\", line 78, in _pseudo_sync_runner\n"," coro.send(None)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3257, in run_cell_async\n"," has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3492, in run_ast_nodes\n"," self.showtraceback()\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2101, in showtraceback\n"," stb = self.InteractiveTB.structured_traceback(etype,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1367, in structured_traceback\n"," return FormattedTB.structured_traceback(\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1267, in structured_traceback\n"," return VerboseTB.structured_traceback(\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1142, in structured_traceback\n"," formatted_exceptions += self.format_exception_as_a_whole(etype, evalue, etb, lines_of_context,\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1082, in format_exception_as_a_whole\n"," last_unique, recursion_repeat = find_recursion(orig_etype, evalue, records)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 382, in find_recursion\n"," return len(records), 0\n","TypeError: object of type 'NoneType' has no len()\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n"," stb = value._render_traceback_()\n","AttributeError: 'TypeError' object has no attribute '_render_traceback_'\n","\n","During handling of the above exception, another exception occurred:\n","\n","Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n"," return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n"," return f(*args, **kwargs)\n"," File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n"," records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n"," File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n"," frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n"," File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n"," filename = getsourcefile(frame) or getfile(frame)\n"," File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n"," module = getmodule(object, filename)\n"," File \"/usr/lib/python3.10/inspect.py\", line 861, in getmodule\n"," file = getabsfile(object, _filename)\n"," File \"/usr/lib/python3.10/inspect.py\", line 845, in getabsfile\n"," return os.path.normcase(os.path.abspath(_filename))\n"," File \"/usr/lib/python3.10/posixpath.py\", line 384, in abspath\n"," cwd = os.getcwd()\n","OSError: [Errno 107] Transport endpoint is not connected\n"]}],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lJETHzwigDxy","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"kgzKNZfYgD1m","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"tf4_A2JmgD5c","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"_8phDK1ugD9r","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"YSVy5mX-gEBA","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"HX5QFhPJgEEZ","pycharm":{"name":"#%%\n"}},"outputs":[],"source":[]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.9"}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file