我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用dataset.connect()。
def add_root(root, db_connection_string, events=False): db = dataset.connect(db_connection_string) root_table = db['root'] entry_table = db['entry'] dataset_table = db['dataset'] column_table = db['column'] if events: event_table = db['event'] root_table.upsert(root_to_record(root), ['path']) for entrykey in root.entries: entry = root[entrykey] entry_table.upsert(entry_to_record(entry, root), ['path']) for dsetkey in entry.datasets: dset = entry[dsetkey] dataset_table.upsert(dset_to_record(dset, entry), ['path']) for column in dset_columns_to_records(dset): column_table.upsert(column, ['path', 'name']) if events and isinstance(dset, bark.EventData): for event in events_to_records(dset): event_table.upsert(event, ['path', 'index'])
def createTable(tab): tableName=tab.tableName columns=tab.m db = dataset.connect('sqlite:///olidata.db') db.query('DROP table IF EXISTS '+tableName) table = db.create_table(tableName) for row in columns: if (row[2]=='int'): table.create_column(row[0], sqlalchemy.Integer) elif (row[2]=='varchar'): table.create_column(row[0], sqlalchemy.VARCHAR(255)) cols=', '.join(columns[:,0]) db.query('DROP INDEX IF EXISTS unique_name') db.query('create unique index unique_name on '+tableName+'('+cols+')') print(db[tableName].columns)
def parse_audit_files(): audit_paths = glob.glob('audit/workflow_mmlinear_started_20151026_17*') audit_data = [] for path in audit_paths: with open(path) as fh: cp = ConfigParser() cp.readfp(fh) for sec in cp.sections(): dat = dict(cp.items(sec)) if 'train_lin' in dat['instance_name']: ms = re.match('train_lin_trn([0-9]+|rest)_tst([0-9]+)_c([0-9\.]+)', dat['instance_name']) if ms is None: raise Exception('No match in name: ' + dat['instance_name']) m = ms.groups() dat['training_size'] = m[0] dat['test_size'] = m[1] dat['cost'] = m[2] audit_data.append(dat) db = dataset.connect('sqlite:///:memory:') tbl = db['audit'] for d in audit_data: tbl.insert(d) return tbl
def setup_db(connection_string): db = dataset.connect(connection_string) connections = db['connection'] users = db['user'] tweets = db['tweet'] medias = db['media'] mentions = db['mention'] urls = db['url'] hashtags = db['hashtag'] tweets.create_index(['tweet_id']) medias.create_index(['tweet_id']) mentions.create_index(['user_id']) mentions.create_index(['mentioned_user_id']) urls.create_index(['tweet_id']) hashtags.create_index(['tweet_id']) users.create_index(['user_id']) connections.create_index(['friend_id']) connections.create_index(['follower_id']) return db
def setup_db(connection_string): db = dataset.connect(connection_string) pages = db['page'] users = db['user'] posts = db['post'] comments = db['comment'] interactions = db['interaction'] users.create_index(['user_id']) posts.create_index(['post_id']) comments.create_index(['comment_id']) comments.create_index(['post_id']) interactions.create_index(['comment_id']) interactions.create_index(['post_id']) interactions.create_index(['user_id']) return db
def subscribe(user_id): """ subscribe to the database """ db = dataset.connect(database_url) table = db['subscriptions'] if table.find_one(id_user=user_id) is None: table.insert(dict(id_user=user_id)) text_ = """This thread has succesfully subscribed to recieve New Uber Codes! \nI will send you the latest Uber Promo Codes when they get released so that you can apply them first!\n\n""" text_ += """These are the latest codes right now\n\n""" new_codes = get_code() for key in new_codes: text_ += "<b>" + key + "</b> | Expires - " + new_codes[key][0] + " | " + new_codes[key][1] text_ += "\n" return text_ else: return "This thread is already subscribed to recieve New Uber Codes!"
def get_new_codes(): """ Return New Codes and Refresh DB""" db = dataset.connect(database_url) new_codes = get_code() table = db['promo'] """ Get New Codes""" new = {} for key, value in new_codes.items(): if table.find_one(promo=key) is None: new[key] = [new_codes[key][0], new_codes[key][1]] else: pass """ Add to DB """ for key in new: table.insert(dict(promo=key, desc=new_codes[key][1], exp=new_codes[key][0])) return new
def get_new_breakdowns(): """ Return Breakdown Notifications and Refresh DB""" db = dataset.connect(database_url) new_breakdown = all_breakdowns() table = db['train'] """ Get New Breakdowns""" new = [] for key, value in new_breakdown.items(): if table.find_one(tweet_id=key) is None: new.append(key) else: pass """ Add to DB """ for key in new: table.insert(dict(tweet_id=key, tweet=new_breakdown[key]['tweet'], created_at=new_breakdown[key]['created_at'])) return new
def __init__(self): self.db = dataset.connect('sqlite:///:memory:')
def incr_database(table=None, name=None): db = dataset.connect('sqlite:///db' + hash) if table and name: table = db[table] r = table.find_one(name=name) if r == None: table.insert(dict(name=name, value=0)) r = table.find_one(name=name) new = r['value'] + 1 a = dict(name=name, value=new) table.update(a, ['name']) return new return Falseacpi -V
def get_user_lang(self): db = dataset.connect('sqlite:///db' + hash) table = db['user:' + str(self.bot_type) + str(self.chat_id)] r = table.find_one(info='lang') if r == None: table.insert(dict(info='lang', value=defaut_lang)) return defaut_lang return r['value']
def update_user_lang(self, new_lag): if get_user_lang(self) == new_lag: return new_lag else: db = dataset.connect('sqlite:///db' + hash) table = db['user:' + str(self.bot_type) + str(self.chat_id)] a = dict(info='lang', value=new_lag) table.update(a, ['info']) return new_lag
def get_status(input): db = dataset.connect('sqlite:///db' + hash) table = db['status'] r = table.find_one(name=input) if r == None: return str(input + ': 0') i = r['value'] output = str(input + ': ' + str(i)) return output
def connect_to_db(): """Connecting to DB""" db = dataset.connect('sqlite:///../../database/smells.sqlite') return db
def connect_to_db(): """ Connect to an SQLite database. Return a connection.""" db = dataset.connect('sqlite:///../database/smells.sqlite') return db
def main(): db = dataset.connect('sqlite:///../database/smells.sqlite') sqlite_file = '/home/jen/projects/smelly_london/git/smelly_london/database' column_names = ['category', 'location', 'number_of_smells', 'centroid_lat', 'centroid_lon', 'id', 'year', 'sentence'] sql = 'select {column_names} from (select Category category, Borough location, Id id, Year year, Sentence sentence, count(*) number_of_smells from smells group by Category, Borough having Year = "1904") join locations on location = name;' conn, cur = connect_to_sqlite_db(sqlite_file) data = sql_get_data_colnames(cur, sql, column_names) close_sqlite_connection(conn) return data
def save_to_database(self, results): """Save results to the database.""" db = dataset.connect('sqlite:///../database/smells.sqlite') table = db['smells'] for result in results: table.insert({'Category': result.category, 'Borough': result.borough, 'Year': result.year, 'Sentence': result.sentence, 'bID': result.bID, 'URL': result.url, 'MOH': result.mohRegion})
def __enter__(self): self.db = dataset.connect(pyconfig.get('db_url')) self.db.__enter__() self.table = self.db.get_table(self.table_name, **self.kwargs) logger.debug( "BeardDBTable initalised with: self.table: {}, self.db: {}".format( self.table, self.db)) return self
def get_all_keys(): with dataset.connect(config.db_name) as db: table = db['keys'] return table.all()
def make_key(chat_id): """Make key.""" with dataset.connect(config.db_name) as db: table = db['keys'] return table.insert( dict( chat_id=chat_id, key="".join( (random.choice(string.ascii_letters) for x in range(20)) ) ) )
def get_key(chat_id): """Get key. If key exists, get key. If key does not exist, create key and get it. """ with dataset.connect(config.db_name) as db: table = db['keys'] existing_key = table.find_one(chat_id=chat_id) if existing_key: return existing_key else: make_key(chat_id) return get_key(chat_id)
def is_key_match(key): logger.debug("Key is: {}".format(key)) with dataset.connect(config.db_name) as db: table = db['keys'] if table.find_one(key=key): return True
def __init__(self, db_path): if db_path is None: self.engine = dataset.connect('sqlite:///:memory:') else: self.engine = dataset.connect('sqlite:///%s' % db_path) self.table = self.engine['files'] self.table.delete()
def getDbStartEnd(start, end): db = dataset.connect('sqlite:///../2_createAndFillDatabase/olidata.db') startLin = str(toLinuxTime(start, '%Y/%m/%d %H:%M')) endLin = str(toLinuxTime(end, '%Y/%m/%d %H:%M')) return [db,startLin,endLin]
def openDB(databaseFilePath = None): if ((databaseFilePath == None) or (not (os.path.isfile(databaseFilePath)))): databaseFilePath = settings.getDBFilePath() databaseURL = 'sqlite:///{}'.format(databaseFilePath) return dataset.connect(databaseURL)
def setup_sqlite(db_url, collection): """ create required tables & indexes """ if collection == 'twitter': db = dataset.connect(db_url) connections = db['connection'] users = db['user'] tweets = db['tweet'] medias = db['media'] mentions = db['mention'] urls = db['url'] hashtags = db['hashtag'] tweets.create_index(['tweet_id']) medias.create_index(['tweet_id']) mentions.create_index(['user_id']) mentions.create_index(['mentioned_user_id']) urls.create_index(['tweet_id']) hashtags.create_index(['tweet_id']) users.create_index(['user_id']) connections.create_index(['friend_id']) connections.create_index(['follower_id']) return db
def __init__(self, db_path): db = dataset.connect('sqlite:///%s' % db_path) self.tables = { "by_event": db["by_event"], "by_person": db["by_person"] }
def __init__(self, url, result_table="results", complementary_table="complementary", space_table="space"): super(SQLiteConnection, self).__init__() if url.endswith("/"): raise RuntimeError("Empty database name {}".format(url)) if url.endswith((" ", "\t")): raise RuntimeError("Database name ends with space {}".format(url)) if not url.startswith("sqlite://"): raise RuntimeError("Missing 'sqlite:///' at the begin of url".format(url)) if url == "sqlite://" or url == "sqlite:///:memory:": raise RuntimeError("Cannot use memory database as it exists only for the time of the connection") match = re.search("sqlite:///(.*)", url) if match is not None: db_path = match.group(1) else: raise RuntimeError("Cannot find sqlite db path in {}".format(url)) self.url = url self.result_table_name = result_table self.complementary_table_name = complementary_table self.space_table_name = space_table self._lock = filelock.FileLock("{}.lock".format(db_path)) self.hold_lock = False # with self.lock(): # db = dataset.connect(self.url) # # Initialize a result table and ensure float for loss # results = db[self.result_table_name] # results.create_column("_loss", sqlalchemy.Float)
def all_results(self): """Get a list of all entries of the result table. The order is undefined. """ # Only way to ensure old db instances are closed is to force garbage collection # See dataset note : https://dataset.readthedocs.io/en/latest/api.html#notes gc.collect() db = dataset.connect(self.url) return list(db[self.result_table_name].all())
def find_results(self, filter): """Get a list of all results associated with *filter*. The order is undefined. """ gc.collect() db = dataset.connect(self.url) return list(db[self.result_table_name].find(**filter))
def insert_result(self, document): """Insert a new *document* in the result table. The columns must not be defined nor all present. Any new column will be added to the database and any missing column will get value None. """ gc.collect() db = dataset.connect(self.url) return db[self.result_table_name].insert(document)
def update_result(self, filter, values): """Update or add *values* of given rows in the result table. Args: filter: An identifier of the rows to update. values: A mapping of values to update or add. """ gc.collect() filter = filter.copy() keys = list(filter.keys()) filter.update(values) db = dataset.connect(self.url) return db[self.result_table_name].update(filter, keys)
def all_complementary(self): """Get all entries of the complementary information table as a list. The order is undefined. """ gc.collect() db = dataset.connect(self.url) return list(db[self.complementary_table_name].all())
def insert_complementary(self, document): """Insert a new document (row) in the complementary information table. """ gc.collect() db = dataset.connect(self.url) return db[self.complementary_table_name].insert(document)
def find_complementary(self, filter): """Find a document (row) from the complementary information table. """ gc.collect() db = dataset.connect(self.url) return db[self.complementary_table_name].find_one(**filter)
def get_space(self): """Returns the space used for previous experiments. Raises: AssertionError: If there are more than one space in the database. """ gc.collect() db = dataset.connect(self.url) entry_count = db[self.space_table_name].count() if entry_count == 0: return None assert entry_count == 1, "Space table unexpectedly contains more than one space." return pickle.loads(db[self.space_table_name].find_one()["space"])
def insert_space(self, space): """Insert a space in the database. Raises: AssertionError: If a space is already present in the database. """ gc.collect() db = dataset.connect(self.url) assert db[self.space_table_name].count() == 0, ("Space table cannot contain more than one space, " "clear table first.") return db[self.space_table_name].insert({"space": pickle.dumps(space)})
def run(consumer_key, consumer_secret, access_key, access_secret, connection_string): db = dataset.connect(connection_string) api = get_api(consumer_key, consumer_secret, access_key, access_secret) user_table = db['user'] users = user_table.find(user_table.table.columns.user_id != 0, profile_collected=0) users = [u for u in users] if len(users) == 0: print('No users without profiles') return None ids_to_lookup = [] for user in users: ids_to_lookup.append(user['user_id']) if len(ids_to_lookup) == 100: print('Getting profiles') profiles = get_profiles(api, user_ids=ids_to_lookup) print('Updating 100 profiles') upsert_profiles(db, profiles) ids_to_lookup = [] print('Sleeping, timestamp: ' + str(datetime.now())) time.sleep(5) print('Getting profiles') profiles = get_profiles(api, user_ids=ids_to_lookup) print('Updating ' + str(len(ids_to_lookup)) + ' profiles') upsert_profiles(db, profiles) print('Finished getting profiles')
def run(consumer_key, consumer_secret, access_key, access_secret, connection_string, threshold=5000, seed_only=True): db = dataset.connect(connection_string) api = get_api(consumer_key, consumer_secret, access_key, access_secret) if seed_only: is_seed = 1 else: is_seed = 0 user_table = db['user'] users = user_table.find(user_table.table.columns.friends_count < threshold, friends_collected=0, is_seed=is_seed) users = [u for u in users] all_users = len(users) remaining = all_users for u in users: try: print('Getting friend ids for ' + u['screen_name']) next, prev, friend_ids = get_friend_ids( api, screen_name=u['screen_name']) print('Adding ' + str(len(friend_ids)) + ' user ids to db') insert_if_missing(db, user_ids=friend_ids) print('Creating relationships for ' + str(u['user_id'])) create_connections(db, u['user_id'], friend_ids=friend_ids) update_dict = dict(id=u['id'], friends_collected=1) user_table.update(update_dict, ['id']) # Can only make 15 calls in a 15 minute window to this endpoint remaining -= 1 time_left = remaining / 60.0 print(str(time_left) + ' hours to go') print('Sleeping for 1 minute, timestamp: ' + str(datetime.now())) time.sleep(60) except: continue
def _initDb(self): db = dataset.connect(config.log_db) l.info('{} db backend connected.'.format(db.engine.name)) return db
def write_hyperparams(log_dir, params, mode='FILE'): if mode == 'FILE' or mode == 'BOTH': os.makedirs(log_dir) hyperparam_file = os.path.join(log_dir, 'hyperparams.json') with open(hyperparam_file, 'w') as f: f.write(json.dumps(params)) if mode == 'DATABASE' or mode == 'BOTH': db = dt.connect(constants.DATABASE_CONNECTION_STRING) runs_table = db['runs'] runs_table.insert(params) if mode not in ('FILE', 'DATABASE', 'BOTH'): raise ValueError('{} mode not recognized. Try with FILE, DATABASE or BOTH'.format(mode))
def update_in_db(datadict): db = dt.connect(constants.DATABASE_CONNECTION_STRING) runs_table = db['runs'] runs_table.update(datadict, keys=['hash'])
def cli(argv): # Prepare conf dict conf = helpers.get_variables(config, str.isupper) # Prepare conn dict conn = { 'warehouse': dataset.connect(config.WAREHOUSE_URL), } # Get and call collector collect = importlib.import_module('collectors.%s' % argv[1]).collect collect(conf, conn, *argv[2:])
def open_spider(self, spider): if spider.conf and spider.conn: self.__conf = spider.conf self.__conn = spider.conn else: # For runs trigered by scrapy CLI utility self.__conf = helpers.get_variables(config, str.isupper) self.__conn = {'warehouse': dataset.connect(config.WAREHOUSE_URL)}
def conn(): warehouse = dataset.connect(config.WAREHOUSE_URL) for table in warehouse.tables: warehouse[table].delete() return {'warehouse': warehouse}
def clear_db(): """ Delete all users and promos """ db = dataset.connect(database_url) db['subscriptions'].drop() db['promo'].drop()
def get_all_users(): """ get all users """ db = dataset.connect(database_url) output = [] for user in db['subscriptions']: output.append(user['id_user']) return output