问题
EDIT: I have made a short question because I think this one is too long, sorry
First of all, I am a newcomer to databases, programming languages and so on... so sorry if this question is not so proper nor specific, any help or guidance would be much appreciated...
The context I am working with is the following: I am querying an existing database by means of its APIs in order to retrieve certain information to design my own database.
The point to create this database is for example to let the user introduce a gene to know where in the organism it is over (UP) or under (DOWN) expressed, and in which experiment this type of expression has been seen.
For the time being, what I'm doing is just querying the existing database and parsing the json result to obtain for each organism part, all the genes that are over or under-expressed (and for each gene I obtain as well the experiments where that type of expression has been reported)
(In Brain)
GENE1
Experiment1 UP
Experiment2 UP
Experiment3 UP
Experiment4 DOWN
GENE2
Experiment5 DOWN
Experiment2 DOWN
Experiment3 DOWN
Experiment8 UP
Experiment9 DOWN
Different tables I think I will need are: "genes", "organs", "experiments" and "type of expression" (and "genes2experiments2organs")
Take into account that one gene can be expressed in more than one organism_part and can have different types of expression related to more than one experiment, and one experiment can comprise more than one gene (many to many relationships)
What I would like know first is how to add relational data and know if my attempt is going to the right direction or should I change the schema/idea of the database...
My first attempt is this:
###########################################
DATABASE DEFINITION
###########################################
from sqlalchemy import create_engine, Column, Integer, String, Date, ForeignKey, Table, Float
from sqlalchemy.orm import sessionmaker, relationship, backref
from sqlalchemy.ext.declarative import declarative_base
import requests
Base = declarative_base()
Genes2experiments2organs = Table('genes2experiments2organs',Base.metadata,
Column('gene_id', String, ForeignKey('genes.id')),
Column('experiment_id', String, ForeignKey('experiments.id')),
Column('organ_id', String, ForeignKey('organs.id'))
)
class Genes(Base):
__tablename__ = 'genes'
id = Column(String(45), primary_key=True)
def __init__(self, id=""):
self.id= id
def __repr__(self):
return "<genes(id:'%s')>" % (self.id)
class Experiments(Base):
__tablename__ = 'experiments'
id = Column(String(45), primary_key=True)
experiments = relationship("Experiments", secondary=Genes2experiments2organs, backref="genes")
organs = relationship("Organs", secondary=Genes2experiments2organs, backref="genes")
def __init__(self, id=""):
self.id= id
def __repr__(self):
return "<experiments(id:'%s')>" % (self.id)
class Organs(Base):
__tablename__ = 'organs'
id = Column(String(45), primary_key=True)
def __init__(self, id=""):
self.id= id
def __repr__(self):
return "<organs(id:'%s')>" % (self.id)
class Expression_type(Base):
__tablename__ = 'expression_type'
id = Column(String(45), primary_key=True)
def __init__(self, id=""):
self.id= id
def __repr__(self):
return "<expression_type(id:'%s')>" % (self.id)
#####################################################
INSERTING DATA
#####################################################
def setUp():
global Session
engine=create_engine('mysql://root:password@localhost/db_name?charset=utf8', pool_recycle=3600,echo=False)
Session=sessionmaker(bind=engine)
def add_data(): ## I am just adding genes without taking into account the other related data to these genes.....
session=Session()
for i in range(0,1000,200):
request= requests.get('http://www.ebi.ac.uk/gxa/api/v1',params={"updownInOrganism_part":"brain","rows":200,"start":i})
result = request.json
for item in result['results']:
gene_to_add = item['gene']['ensemblGeneId']
session.commit()
session.close()
setUp()
add_data()
session=Session()
genes=session.query(Genes).all()
print "List of genes introduced:"
for gene in genes:
print gene.id
session.close()
So, with this code I just populate "genes" table, but without taking into account the relationships that exist with the other data I will have to include in the database... what is the procedure to do such thing, adding relational data? And a way to avoid inserting duplicate genes for instance when populating the table by means of API queries??
By the way, as you can see, I didn't put all many to many relationships (secondary), just in "genes" table because I am no sure if I am going right or completely wrong... thank you
回答1:
This should do what you are looking for ...
from sqlalchemy import (Column, create_engine, Integer, ForeignKey, Unicode,
Enum)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship
Base = declarative_base()
class Gene(Base):
__tablename__ = 'gene'
id = Column(Integer, primary_key=True)
name = Column(Unicode(64), unique=True)
def __init__(self, name):
self.name = name
class Experiment(Base):
__tablename__ = 'experiment'
id = Column(Integer, primary_key=True)
class Organ(Base):
__tablename__ = 'organ'
id = Column(Integer, primary_key=True)
name = Column(Unicode(64), unique=True)
def __init__(self, name):
self.name = name
class Measurement(Base):
__tablename__ = 'measurement'
id = Column(Integer, primary_key=True)
experiment_id = Column(Integer, ForeignKey(Experiment.id))
gene_id = Column(Integer, ForeignKey(Gene.id))
organ_id = Column(Integer, ForeignKey(Organ.id))
# Add your measured values here
expression = Column(Enum('UP', 'DOWN'))
# ...
experiment = relationship(Experiment, backref='measurements')
gene = relationship(Gene, backref='measurements')
organ = relationship(Organ, backref='measurements')
def __repr__(self):
return 'Experiment %d: %s, %s, %s' % (self.experiment.id,
self.gene.name, self.organ.name, self.expression)
if __name__ == '__main__':
engine = create_engine('sqlite://')
session = sessionmaker(engine)()
Base.metadata.create_all(engine)
#
# Creating the data
#
x = Gene('Gene X')
y = Gene('Gene Y')
z = Gene('Gene Z')
heart = Organ('Heart')
lungs = Organ('Lungs')
brain = Organ('Brain')
session.add_all([x, y, z, heart, lungs, brain])
session.commit()
experiment_1 = Experiment()
experiment_1.measurements.extend(
[Measurement(gene_id=x.id, organ_id=heart.id, expression='UP'),
Measurement(gene_id=x.id, organ_id=lungs.id, expression='UP'),
Measurement(gene_id=x.id, organ_id=brain.id, expression='DOWN'),
Measurement(gene_id=y.id, organ_id=brain.id, expression='UP'),
Measurement(gene_id=z.id, organ_id=brain.id, expression='DOWN')])
experiment_2 = Experiment()
experiment_2.measurements.extend(
[Measurement(gene_id=y.id, organ_id=lungs.id, expression='UP'),
Measurement(gene_id=y.id, organ_id=lungs.id, expression='UP'),
Measurement(gene_id=y.id, organ_id=brain.id, expression='UP'),
Measurement(gene_id=x.id, organ_id=brain.id, expression='UP'),
Measurement(gene_id=z.id, organ_id=heart.id, expression='UP')])
session.add_all([experiment_1, experiment_2])
session.commit()
#
# Querying the data
#
print('All measurements in the first experiment')
experiment = session.query(Experiment).filter(Experiment.id == 1).one()
for measurement in experiment.measurements:
print(measurement)
print('')
print('All measurements of Gene X')
gene_x = session.query(Gene).filter(Gene.name == 'Gene X').one()
for measurement in gene_x.measurements:
print(measurement)
print('')
print('All measurements of the brain')
the_brain = session.query(Organ).filter(Organ.name == 'Brain').one()
for measurement in the_brain.measurements:
print(measurement)
print('')
来源:https://stackoverflow.com/questions/15899565/how-to-insert-relational-data-many-to-many-in-sql-alchemy-by-means-of-api-quer