pydoris-client 1.0.4

Last updated:

0 purchases

pydoris-client 1.0.4 Image
pydoris-client 1.0.4 Images
Add to Cart

Description:

pydorisclient 1.0.4

Apache Doris Python Client
A Apache Doris client for the Python programming language.
Apache Doris is a high-performance, real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrent point query scenarios but also high-throughput complex analysis scenarios. All this makes Apache Doris an ideal tool for scenarios including report analysis, ad-hoc query, unified data warehouse, and data lake query acceleration. On Apache Doris, users can build various applications, such as user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, and order analysis.
Installation
pip install pydoris-client

DorisClient Usage
from pydoris.doris_client import *
from pydoris.util.generate_test_data import *

fe_host = "127.0.0.1"
fe_http_port = "8040"
fe_query_port = "9030"
username = 'root'
passwd = ""
db = "test"
doris_client = DorisClient(fe_host=fe_host,
fe_query_port=fe_query_port,
fe_http_port=fe_http_port,
username=username,
password=passwd,
db=db)


def test_create_database():
return doris_client.create_database('pydoris_client_test')


def test_create_table():
doris_client.execute("""create table if not exists pydoris_client_test.write_test(
f_id int,
f_decimal decimal(18,6),
f_timestamp bigint,
f_datetime datetime(6),
f_str string,
f_float float,
f_boolean boolean
)duplicate key(`f_id`)
distributed by hash(`f_id`) buckets 1
properties("replication_allocation" = "tag.location.default: 1");""")


def test_get_table_columns():
print(doris_client.get_table_columns('pydoris_client_test', 'write_test'))


def gen_test_data(num):
list = []
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)
for i in range(num):
line = (i,
generate_decimal(),
generate_timestamp(),
generate_random_datetime(start_date, end_date),
generate_random_string(20),
generate_float(),
generate_boolean())
list.append(line)
return list


# If your data line delimiter need to be specified,use options.set_line_delimiter(delimiter)
def test_write_csv():
# print(list)
df = pd.DataFrame(gen_test_data(100000))
df.columns = ['f_id', 'f_decimal', 'f_timestamp', 'f_datetime', 'f_str', 'f_float', 'f_boolean']
# doris_client.options.set_csv_format(",").set_auto_uuid_label().set_line_delimiter("\\n")
csv = df.to_csv(header=False, index=False)
doris_client.write("pydoris_client_test.write_test", csv)


# 1. If you need use json format to insert data to Doris, you need set json format ,
# because the default format is csv format
# 2. When you json data is [{},{}] please set strip_outer_array=true
# 3. You can customize data import labels , use options.set_label(your_label)
def test_write_json():
df = pd.DataFrame(gen_test_data(100000),
columns=['f_id', 'f_decimal', 'f_timestamp', 'f_datetime', 'f_str', 'f_float', 'f_boolean'])
json_data = df.to_json(orient='records')
options = WriteOptions()
options.set_json_format()
options.set_option("strip_outer_array", "true")
doris_client.write("pydoris_client_test.write_test", json_data, options=options)


# data_df: pd.DataFrame, table_name: str, table_model: str is must
# When repeat_replacement = True, tables with duplicate names will be deleted,be careful
def test_write_from_df():
df = pd.DataFrame(gen_test_data(100000),
columns=['f_id', 'f_decimal', 'f_timestamp', 'f_datetime', 'f_str', 'f_float', 'f_boolean'])
doris_client.write_from_df(df, "pydoris_client_test.df_write_test", "UNIQUE", ['f_id'],
distributed_hash_key=["f_id"], buckets=1,
field_mapping=[("f_decimal", "Decimal(18,6)")]
, table_properties={"replication_allocation": "tag.location.default: 1"},
repeat_replacement=False)


def test_read_to_df():
dataframe = doris_client.query_to_dataframe("select * from pydoris_client_test.write_test limit 1000",
['f_id', 'f_decimal', 'f_timestamp', 'f_datetime',
'f_str', 'f_float', 'f_boolean'])
with pd.option_context('expand_frame_repr', False, 'display.max_rows', None):
print(dataframe)


def test_query():
import datetime
start = datetime.datetime.now()
result = doris_client.query("select * from pydoris_client_test.write_test")
end = datetime.datetime.now()
print((end - start).seconds)
print(len(result))


def test_list_tables():
tables = doris_client.list_tables("pydoris_client_test")
print(tables)


def test_drop_table():
db = 'pydoris_client_test'
table_name1 = 'write_test'
table_name2 = 'df_write_test'
tables = doris_client.list_tables(db)
print(tables)
doris_client.drop_table(db, table_name1)
doris_client.drop_table(db, table_name2)
tables = doris_client.list_tables(db)
print(tables)

if __name__ == '__main__':
test_create_database()
test_create_table()
test_get_table_columns()
test_write_csv()
test_write_json()
test_write_from_df()
test_read_to_df()
test_query()
test_list_tables()
test_drop_table()

License:

For personal and professional use. You cannot resell or redistribute these repositories in their original state.

Customer Reviews

There are no reviews.