Skip to content

Commit

Permalink
Merge branch 'main' into sf100
Browse files Browse the repository at this point in the history
  • Loading branch information
qishipengqsp committed Aug 23, 2024
2 parents 6c7f94b + d05e55d commit 262ca25
Show file tree
Hide file tree
Showing 33 changed files with 216 additions and 168 deletions.
1 change: 1 addition & 0 deletions scripts/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def run_local(
**({'spark.serializer': 'org.apache.spark.serializer.KryoSerializer'}),
**({'spark.executor.extraJavaOptions': '-XX:+UseG1GC'}),
**({'spark.driver.maxResultSize': '5g'}),
# **({'spark.driver.extraJavaOptions': '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005'}), # Debug
# **({'spark.executor.extraJavaOptions': '-verbose:gc -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps'}),
**spark_conf
}
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ OUTPUT_DIR=out

# For more command line arguments, see the main entry for more information at
# src/main/scala/ldbc/finbench/datagen/LdbcDatagen.scala
time python3 scripts/run.py --jar $LDBC_FINBENCH_DATAGEN_JAR --main-class ldbc.finbench.datagen.LdbcDatagen --memory 500g -- --scale-factor 30 --output-dir ${OUTPUT_DIR}
time python3 scripts/run.py --jar $LDBC_FINBENCH_DATAGEN_JAR --main-class ldbc.finbench.datagen.LdbcDatagen --memory 500g -- --scale-factor 1 --output-dir ${OUTPUT_DIR}
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,17 @@ public CompanyInvestCompany(Company fromCompany, Company toCompany,
this.comment = comment;
}

public static CompanyInvestCompany createCompanyInvestCompany(RandomGeneratorFarm farm,
Company fromCompany, Company toCompany) {
public static void createCompanyInvestCompany(RandomGeneratorFarm farm,
Company investor, Company target) {
Random dateRandom = farm.get(RandomGeneratorFarm.Aspect.COMPANY_INVEST_DATE);
long creationDate = Dictionaries.dates.randomCompanyToCompanyDate(dateRandom, fromCompany, toCompany);
long creationDate = Dictionaries.dates.randomCompanyToCompanyDate(dateRandom, investor, target);
double ratio = farm.get(RandomGeneratorFarm.Aspect.INVEST_RATIO).nextDouble();
String comment =
Dictionaries.randomTexts.getUniformDistRandomTextForComments(
farm.get(RandomGeneratorFarm.Aspect.COMMON_COMMENT));
CompanyInvestCompany companyInvestCompany =
new CompanyInvestCompany(fromCompany, toCompany, creationDate, 0, ratio, false, comment);
fromCompany.getCompanyInvestCompanies().add(companyInvestCompany);

return companyInvestCompany;
new CompanyInvestCompany(investor, target, creationDate, 0, ratio, false, comment);
target.getCompanyInvestCompanies().add(companyInvestCompany);
}

public void scaleRatio(double sum) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,17 @@ public PersonInvestCompany(Person person, Company company,
this.comment = comment;
}

public static PersonInvestCompany createPersonInvestCompany(RandomGeneratorFarm farm, Person person,
Company company) {
public static void createPersonInvestCompany(RandomGeneratorFarm farm, Person investor,
Company target) {
Random dateRandom = farm.get(RandomGeneratorFarm.Aspect.PERSON_INVEST_DATE);
long creationDate = Dictionaries.dates.randomPersonToCompanyDate(dateRandom, person, company);
long creationDate = Dictionaries.dates.randomPersonToCompanyDate(dateRandom, investor, target);
double ratio = farm.get(RandomGeneratorFarm.Aspect.INVEST_RATIO).nextDouble();
String comment =
Dictionaries.randomTexts.getUniformDistRandomTextForComments(
farm.get(RandomGeneratorFarm.Aspect.COMMON_COMMENT));
PersonInvestCompany personInvestCompany = new PersonInvestCompany(person, company, creationDate, 0, ratio,
PersonInvestCompany personInvestCompany = new PersonInvestCompany(investor, target, creationDate, 0, ratio,
false, comment);
person.getPersonInvestCompanies().add(personInvestCompany);

return personInvestCompany;
target.getPersonInvestCompanies().add(personInvestCompany);
}

public void scaleRatio(double sum) {
Expand Down
14 changes: 8 additions & 6 deletions src/main/java/ldbc/finbench/datagen/entities/edges/Transfer.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.util.Comparator;
import ldbc.finbench.datagen.entities.DynamicActivity;
import ldbc.finbench.datagen.entities.nodes.Account;
import ldbc.finbench.datagen.generation.DatagenParams;
import ldbc.finbench.datagen.generation.dictionary.Dictionaries;
import ldbc.finbench.datagen.util.RandomGeneratorFarm;

Expand Down Expand Up @@ -31,14 +32,15 @@ public Transfer(Account fromAccount, Account toAccount, double amount, long crea
this.isExplicitlyDeleted = isExplicitlyDeleted;
}

public static Transfer createTransferAndReturn(RandomGeneratorFarm farm, Account from, Account to,
long multiplicityId,
double amount) {
public static Transfer createTransfer(RandomGeneratorFarm farm, Account from, Account to,
long multiplicityId) {
long deleteDate = Math.min(from.getDeletionDate(), to.getDeletionDate());
long creationDate =
Dictionaries.dates.randomAccountToAccountDate(farm.get(RandomGeneratorFarm.Aspect.TRANSFER_DATE), from, to,
deleteDate);
boolean willDelete = from.isExplicitlyDeleted() && to.isExplicitlyDeleted();
double amount =
farm.get(RandomGeneratorFarm.Aspect.TRANSFER_AMOUNT).nextDouble() * DatagenParams.transferMaxAmount;
Transfer transfer = new Transfer(from, to, amount, creationDate, deleteDate, multiplicityId, willDelete);

// Set ordernum
Expand Down Expand Up @@ -67,9 +69,9 @@ public static Transfer createTransferAndReturn(RandomGeneratorFarm farm, Account
return transfer;
}

public static Transfer createLoanTransferAndReturn(RandomGeneratorFarm farm, Account from, Account to,
long multiplicityId,
double amount) {
public static Transfer createLoanTransfer(RandomGeneratorFarm farm, Account from, Account to,
long multiplicityId,
double amount) {
long deleteDate = Math.min(from.getDeletionDate(), to.getDeletionDate());
long creationDate =
Dictionaries.dates.randomAccountToAccountDate(farm.get(RandomGeneratorFarm.Aspect.LOAN_SUBEVENTS_DATE),
Expand Down
44 changes: 44 additions & 0 deletions src/main/java/ldbc/finbench/datagen/entities/nodes/Company.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import ldbc.finbench.datagen.entities.edges.CompanyGuaranteeCompany;
import ldbc.finbench.datagen.entities.edges.CompanyInvestCompany;
import ldbc.finbench.datagen.entities.edges.CompanyOwnAccount;
import ldbc.finbench.datagen.entities.edges.PersonInvestCompany;
import ldbc.finbench.datagen.generation.dictionary.Dictionaries;

public class Company implements Serializable {
Expand All @@ -22,13 +23,17 @@ public class Company implements Serializable {
private String description;
private String url;
private final List<CompanyOwnAccount> companyOwnAccounts;
// invested by persons
private final List<PersonInvestCompany> personInvestCompanies;
// invested by companies
private final List<CompanyInvestCompany> companyInvestCompanies;
private final LinkedHashSet<CompanyGuaranteeCompany> guaranteeSrc;
private final LinkedHashSet<CompanyGuaranteeCompany> guaranteeDst;
private final List<CompanyApplyLoan> companyApplyLoans;

public Company() {
companyOwnAccounts = new LinkedList<>();
personInvestCompanies = new LinkedList<>();
companyInvestCompanies = new LinkedList<>();
guaranteeSrc = new LinkedHashSet<>();
guaranteeDst = new LinkedHashSet<>();
Expand Down Expand Up @@ -74,10 +79,49 @@ public List<CompanyOwnAccount> getCompanyOwnAccounts() {
return companyOwnAccounts;
}

public List<PersonInvestCompany> getPersonInvestCompanies() {
return personInvestCompanies;
}

public List<CompanyInvestCompany> getCompanyInvestCompanies() {
return companyInvestCompanies;
}

public Company scaleInvestmentRatios() {
double sum = 0;
for (PersonInvestCompany pic : personInvestCompanies) {
sum += pic.getRatio();
}
for (CompanyInvestCompany cic : companyInvestCompanies) {
sum += cic.getRatio();
}
for (PersonInvestCompany pic : personInvestCompanies) {
pic.scaleRatio(sum);
}
for (CompanyInvestCompany cic : companyInvestCompanies) {
cic.scaleRatio(sum);
}
return this;
}

public boolean hasInvestedBy(Company company) {
for (CompanyInvestCompany cic : companyInvestCompanies) {
if (cic.getFromCompany().equals(company)) {
return true;
}
}
return false;
}

public boolean hasInvestedBy(Person person) {
for (PersonInvestCompany pic : personInvestCompanies) {
if (pic.getPerson().equals(person)) {
return true;
}
}
return false;
}

public HashSet<CompanyGuaranteeCompany> getGuaranteeSrc() {
return guaranteeSrc;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public CommonTextDictionary(String filePath, String separator) {

try {
InputStreamReader inputStreamReader = new InputStreamReader(
Objects.requireNonNull(getClass().getResourceAsStream(filePath)), StandardCharsets.UTF_8);
Objects.requireNonNull(getClass().getResourceAsStream(filePath)), StandardCharsets.UTF_8);
BufferedReader dictionary = new BufferedReader(inputStreamReader);
String line;
long totalNum = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Random;
import java.util.TreeMap;
import ldbc.finbench.datagen.generation.DatagenParams;

public class PersonNameDictionary {
private final TreeMap<Long, String> personSurnames;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public class TimeDistribution {
private Map<Integer, Double> hourDistribution;
private double[] hourProbs;
private final double[] hourCumulatives;

public TimeDistribution(String hourDistributionFile) {
loadDistribution(hourDistributionFile);
hourCumulatives = new double[hourProbs.length];
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ldbc.finbench.datagen.generation.events;

import java.io.Serializable;
import java.util.List;
import java.util.Random;
import ldbc.finbench.datagen.entities.edges.CompanyInvestCompany;
import ldbc.finbench.datagen.entities.nodes.Company;
Expand All @@ -21,7 +22,26 @@ public void resetState(int seed) {
randIndex.setSeed(seed);
}

public CompanyInvestCompany companyInvest(Company investor, Company invested) {
return CompanyInvestCompany.createCompanyInvestCompany(randomFarm, investor, invested);
public List<Company> companyInvestPartition(List<Company> investors, List<Company> targets) {
Random numInvestorsRand = randomFarm.get(RandomGeneratorFarm.Aspect.NUMS_COMPANY_INVEST);
Random chooseInvestorRand = randomFarm.get(RandomGeneratorFarm.Aspect.CHOOSE_COMPANY_INVESTOR);
for (Company target : targets) {
int numInvestors = numInvestorsRand.nextInt(
DatagenParams.maxInvestors - DatagenParams.minInvestors + 1
) + DatagenParams.minInvestors;
for (int i = 0; i < numInvestors; i++) {
int index = chooseInvestorRand.nextInt(investors.size());
Company investor = investors.get(index);
if (cannotInvest(investor, target)) {
continue;
}
CompanyInvestCompany.createCompanyInvestCompany(randomFarm, investor, target);
}
}
return targets;
}

public boolean cannotInvest(Company investor, Company target) {
return (investor == target) || investor.hasInvestedBy(target) || target.hasInvestedBy(investor);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,16 +112,16 @@ private void transferSubEvent(Loan loan) {
if (actionRandom.nextDouble() < 0.5) {
if (!cannotTransfer(account, target)) {
transfers.add(
Transfer.createLoanTransferAndReturn(randomFarm, account, target,
getMultiplicityIdAndInc(account, target),
amountRandom.nextDouble() * DatagenParams.transferMaxAmount));
Transfer.createLoanTransfer(randomFarm, account, target,
getMultiplicityIdAndInc(account, target),
amountRandom.nextDouble() * DatagenParams.transferMaxAmount));
}
} else {
if (!cannotTransfer(target, account)) {
transfers.add(
Transfer.createLoanTransferAndReturn(randomFarm, target, account,
getMultiplicityIdAndInc(target, account),
amountRandom.nextDouble() * DatagenParams.transferMaxAmount));
Transfer.createLoanTransfer(randomFarm, target, account,
getMultiplicityIdAndInc(target, account),
amountRandom.nextDouble() * DatagenParams.transferMaxAmount));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ldbc.finbench.datagen.generation.events;

import java.io.Serializable;
import java.util.List;
import java.util.Random;
import ldbc.finbench.datagen.entities.edges.PersonInvestCompany;
import ldbc.finbench.datagen.entities.nodes.Company;
Expand All @@ -22,7 +23,26 @@ public void resetState(int seed) {
randIndex.setSeed(seed);
}

public PersonInvestCompany personInvest(Person person, Company company) {
return PersonInvestCompany.createPersonInvestCompany(randomFarm, person, company);
public List<Company> personInvestPartition(List<Person> investors, List<Company> targets) {
Random numInvestorsRand = randomFarm.get(RandomGeneratorFarm.Aspect.NUMS_PERSON_INVEST);
Random chooseInvestorRand = randomFarm.get(RandomGeneratorFarm.Aspect.CHOOSE_PERSON_INVESTOR);
for (Company target : targets) {
int numInvestors = numInvestorsRand.nextInt(
DatagenParams.maxInvestors - DatagenParams.minInvestors + 1
) + DatagenParams.minInvestors;
for (int i = 0; i < numInvestors; i++) {
int index = chooseInvestorRand.nextInt(investors.size());
Person investor = investors.get(index);
if (cannotInvest(investor, target)) {
continue;
}
PersonInvestCompany.createPersonInvestCompany(randomFarm, investor, target);
}
}
return targets;
}

public boolean cannotInvest(Person investor, Company target) {
return target.hasInvestedBy(investor);
}
}
Loading

0 comments on commit 262ca25

Please sign in to comment.